### "Allegiance, Ability, and Achievement in the American Civil War:
### Commander Traits and Battlefield Military Effectiveness"

### By Jeffrey B. Arnold, J. Tyson Chatagnier, and Gary E. Hollibaugh, Jr.
##### MAIN CODE.

# Clear workspace
rm(list=ls())

# Load packages
suppressPackageStartupMessages({
  library("foreign")
  library("FactoMineR")
  library("semTools")
  library("MASS")
  library("lme4")
  library("texreg")
  library("ordinal")
  library("ggplot2")
  library("lavaan")
  library("progress")
  # library("dplyr")
  library("lavaan.survey")
  library("MuMIn")
  library("lmtest")
  library("MplusAutomation")
  library("effects")
  # library("plyr")
  library("mvtnorm")
  library("Zelig")
  library("ZeligChoice")
  library("ggtern")
  library("RStata")
  library("rms")
  library("boot")
})

# Set ggplot theme for later
theme_set(theme_bw())


# Indicate folders for data and code
DIR_DATA <- "data"
DIR_CODE <- "code"


# Set Stata path (Needed to convert data file to MPlus)
STATA_PATH <- "/Applications/Stata/StataSE.app/Contents/MacOS/stata-se"


# Set directories for outputs
DIR_OUTPUT <- "out"
if (! dir.exists(DIR_OUTPUT)) dir.create(DIR_OUTPUT)
DIR_FIGURES <- DIR_OUTPUT
if (! dir.exists(DIR_FIGURES)) dir.create(DIR_FIGURES)
DIR_TABLES <- DIR_OUTPUT
if (! dir.exists(DIR_TABLES)) dir.create(DIR_TABLES)


# Utility functions to make HTML tables out of R objects; used to make regression output
source(file.path(DIR_CODE,'ACH-Utilities.R'))


# Load data
FILE_GENERALS <- file.path(DIR_DATA, "ACH-Generals.dta")
FILE_COMMANDERS_DATA_SMALL_STATIC <- file.path(DIR_DATA, "ACH-StaticCommanders.csv")
FILE_BATTLE_COMM_MERGED <- file.path(DIR_DATA, "ACH-NewBattles.rds")


#  Impute missing battle data
suppressWarnings(source(file.path(DIR_CODE, "ACH-BattleImputations.R")))
source(file.path(DIR_CODE, "ACH-UpdatingBattles.R"))


# Read in generals data.
generals <- read.dta(FILE_GENERALS)


# Correcting some minor rank typos
generals$topoverall[generals$topoverall == "1st  Lt."] <- "1st Lt."
generals$topoverall[generals$topoverall == "Col. "] <- "Col."


# Defining the Confederacy and Border states
CSA <- c("SC", "MS", "FL", "AL", "GA", "LA", "TX", "VA", "AR", "NC", "TN")


# Border states
borders <- c("KY", "WV", "MD", "DE", "MO")


# Assigning birth locations and types
generals$southborn <- as.numeric(generals$stateborn %in% CSA)
generals$slaveborn <- as.numeric(generals$stateborn %in% CSA) +
  as.numeric(generals$stateborn %in% borders)
generals$foreignborn <- as.numeric(!(generals $stateborn %in% c("AL", "AR", "CA", "CT", "DC",
                                                                "DE", "FL", "GA", "IL", "IN",
                                                                "IA", "KY", "LA", "ME", "MD",
                                                                "MA", "MI", "MN", "MS", "MO",
                                                                "NH", "NJ", "NY", "NC", "OH",
                                                                "OR", "PA", "RI", "SC", "TN",
                                                                "TX", "VT", "VA", "WI")))
generals$northborn <- 1 - generals$southborn - generals$foreignborn
generals$borderborn <- generals$slaveborn - generals$southborn


# Getting rid of some NAs in the officeholding variables
generals$prevsouthgov <- as.numeric(generals$prevsouthgov != "")
generals$prevnorthgov <- as.numeric(generals$prevnorthgov != "")

generals$prevsouthleg <- as.numeric(generals$prevsouthleg != "")
generals$prevnorthleg <- as.numeric(generals$prevnorthleg != "")

generals$prevsouthcong <- as.numeric(generals$prevsouthcong != "")
generals$prevnorthcong <- as.numeric(generals$prevnorthcong != "")


# Coding text ranks as numerical ranks
generals$topoverall[generals$topoverall == ""] <- "None"
generals$topoverall[generals$topoverall == "Pvt."] <- "Enlisted"
generals$topoverall[generals$topoverall == "Sgt."] <- "Enlisted"
generals$topoverall[generals$topoverall == "2d Lt."] <- "O-1"
generals$topoverall[generals$topoverall == "1st Lt."] <- "O-2"
generals$topoverall[generals$topoverall == "Capt." &
                      generals$army != "C.S.N." &
                      generals$army != "U.S.N."] <- "O-3"
generals$topoverall[generals$topoverall == "Lt."] <- "O-3"
generals$topoverall[generals$topoverall == "Maj."] <- "O-4"
generals$topoverall[generals$topoverall == "Lt. Comdr."] <- "O-4"
generals$topoverall[generals$topoverall == "Lt. Col."] <- "O-5"
generals$topoverall[generals$topoverall == "Comdr."] <- "O-5"
generals$topoverall[generals$topoverall == "Col."] <- "O-6"
generals$topoverall[generals$topoverall == "Capt." &
                      generals$army == "C.S.N."] <- "O-6"
generals$topoverall[generals$topoverall == "Capt." &
                      generals$army == "U.S.N."] <- "O-6"
generals$topoverall[generals$topoverall == "Brig. Gen."] <- "O-7"
generals$topoverall[generals$topoverall == "Comm."] <- "O-7"
generals$topoverall[generals$topoverall == "Rear Adm."] <- "O-8"
generals$topoverall[generals$topoverall == "Maj. Gen."] <- "O-8"
generals$topoverall[generals$topoverall == "Lt. Gen."] <- "O-9"
generals$topoverall[generals$topoverall == "Vice Adm."] <- "O-9"
generals$topoverall[generals$topoverall == "Gen."] <- "O-10"
generals$topoverall[generals$topoverall == "Adm."] <- "O-10"

generals$topoverall <- factor(generals$topoverall, levels = c("None",
                                                              "Enlisted",
                                                              "O-1",
                                                              "O-2",
                                                              "O-3",
                                                              "O-4",
                                                              "O-5",
                                                              "O-6",
                                                              "O-7",
                                                              "O-8",
                                                              "O-9",
                                                              "O-10"))

# Reading in biographical information such as birth dates, etc.
static_info <- read.csv(FILE_COMMANDERS_DATA_SMALL_STATIC)

# Correcting some more NAs --- again, these are simply the lack of ones.
static_info$mexwar[which(is.na(static_info$mexwar))] <- 0
static_info$prewarplanter[which(is.na(static_info$prewarplanter))] <- 0
static_info$otherMA <- as.numeric(static_info$otherMA != "")
static_info$USMA <- as.numeric(static_info$USMA == 1)


# Merging the two "static" and "dynamic" Generals datasets.
colnames(static_info)[1] <- "comm_id"
generals_merge <- merge(generals, static_info, by = "comm_id", all.x = TRUE)


# Previous (side-specific) officeholding experience --- aggregating up.
generals_merge$prevnorthoffice <- as.numeric((generals_merge$prevnorthcong +
                                              generals_merge$prevnorthgov +
                                              generals_merge$prevnorthleg) > 0)

generals_merge$prevsouthoffice <- as.numeric((generals_merge$prevsouthcong +
                                              generals_merge$prevsouthgov +
                                              generals_merge$prevsouthleg) > 0)

generals_merge$prev_office_bias <- generals_merge$prevsouthoffice - generals_merge$prevnorthoffice


# Creating weighted family variables.
generals_merge$weightCSAkin <- generals_merge$numCSArels * generals_merge$avgCSAkin
generals_merge$weightUSAkin <- generals_merge$numUSArels * generals_merge$avgUSAkin
generals_merge$weightCSAkin_inlaws <- generals_merge$numCSAinlaws * generals_merge$avgCSAkin_inlaws
generals_merge$weightUSAkin_inlaws <- generals_merge$numUSAinlaws * generals_merge$avgUSAkin_inlaws
generals_merge$CSA_kinbias <- generals_merge$weightCSAkin - generals_merge$weightUSAkin
generals_merge$CSA_kin_inlawsbias <- generals_merge$weightCSAkin_inlaws - generals_merge$weightUSAkin_inlaws


# Separate flag officer experience variable.
generals_merge$flagofficer_exp <- as.numeric((generals_merge$topoverall == "O-7") +
                                               (generals_merge$topoverall == "O-8") +
                                               (generals_merge$topoverall == "O-9") +
                                               (generals_merge$topoverall == "O-10"))


# Midshipman dummy (precursor of USNA).
generals_merge$passmidship_dummy <- as.numeric(generals_merge$passmidship != "")


# Coding and cleaning partisan affiliations before the war.
generals_merge$prewarpartisan <- sign(apply(cbind(generals_merge$prewaroffice,
                                                  generals_merge$prewarnomination_nooffice,
                                                  generals_merge$prewarconvention),
                                            1, sum, na.rm = TRUE))
generals_merge$prewardemocrat <- as.numeric(generals_merge$lastprewarparty == "Democrat")
generals_merge$prewarrepublican <- as.numeric(generals_merge$lastprewarparty == "Republican")
generals_merge$prewarwhig <- as.numeric(generals_merge$lastprewarparty == "Whig")
generals_merge$ever_free_soil <- as.numeric(generals_merge$otherprewarparty1 == "Free-State" |
                                              generals_merge$otherprewarparty2 == "Free-State" |
                                              generals_merge$otherprewarparty3 == "Free-State" |
                                              generals_merge$lastprewarparty == "Free-State" |
                                              generals_merge$otherprewarparty1 == "Free Soil" |
                                              generals_merge$otherprewarparty2 == "Free Soil" |
                                              generals_merge$otherprewarparty3 == "Free Soil" |
                                              generals_merge$lastprewarparty == "Free Soil")


# Deleting duplicate observations from the merged Generals data.
generals_unique <- unique(generals_merge[,c('comm_id', 'year', 'edate2', 'died_date2',
                                            'yearborn', 'stateborn', 'southborn',
                                            'borderborn',
                                            'foreignborn', 'topoverall',
                                            'flagofficer_exp',
                                            'time', 'command_exp', 
                                            'CSA_kinbias', 'CSA_kin_inlawsbias', 
                                            'USMA', 'passmidship_dummy', 'otherMA',
                                            'prewarplanter',
                                            'mexwar', 'prewarpartisan', 'prewardemocrat',
                                            'prewarrepublican', 'prewarwhig',
                                            'ever_free_soil', 'peaceconvention',
                                            'prewaroffice', 'prev_office_bias')])


# Removing some more NAs (these in question should be zeroes).
generals_unique$prewaroffice[is.na(generals_unique$prewaroffice)] <- 0
generals_unique$peaceconvention[is.na(generals_unique$peaceconvention)] <- 0


# Ensure the ranking variable is an ordered numeric variable.
generals_unique$topoverall <- as.numeric(factor(generals_unique$topoverall, ordered = TRUE))

# Logging time to reduce some of the nonnormality
generals_unique$log_time <- log(generals_unique$time + 1)

# Creating confederate indicator
generals_unique$confed <- 0
generals_unique$confed[grep("CS",generals_unique$comm_id)] <- 1


# There are some instances where there are multiple "updates" between battles that
# don't actually affect the variables of interest, so this pulls out the unique ones.
generals_unique2 <- generals_unique[(generals_unique$topoverall > 2 &
                                       generals_unique$edate2 <= "1865-06-22" &
                                       generals_unique$edate2 >= "1861-04-12" &
                                       generals_unique$edate2 <= generals_unique$died_date2),]


# Remove observations with missing values (b/c these by this point are just ones without dates).
generals_unique2 <- na.omit(generals_unique2)


# Now bring in the battles data
battles <- readRDS(FILE_BATTLE_COMM_MERGED)


# Recode some of the battles variables to better comport with the format of the Generals data.
battles$confed <- as.numeric(battles$combatant == "CS")
battles$battle_results <- 0
battles$battle_results[battles$outcome == "Union" & battles$confed == 0] <- 1
battles$battle_results[battles$outcome == "Confederate" & battles$confed == 1] <- 1
battles$battle_results[battles$outcome == "Union" & battles$confed == 1] <- -1
battles$battle_results[battles$outcome == "Confederate" & battles$confed == 0] <- -1
battles$battle_results <- factor(battles$battle_results, labels = c("Defeat", "Inconclusive", "Victory"))


# Add a new row for every battle the general was involved in; update number of battles at the end
# Dates of promotion don't necessarily match with battle dates
# Will throw some warnings; can ignore
all_gens <- NULL
for(i in 1:length(sort(unique(generals_unique2$comm_id)))){

  # Battle counter
  foo_battle <- 0
  foo_CS_casualties <- 0
  foo_US_casualties <- 0
  foo_CS_casualties_min <- 0
  foo_US_casualties_min <- 0
  foo_CS_casualties_max <- 0
  foo_US_casualties_max <- 0

  # Get commander ID
  foo_id <- sort(unique(generals_unique2$comm_id))[i]

  # Get general rows
  foo_gen <- generals_unique2[generals_unique2$comm_id == foo_id,]

  # Get battle rows
  foo_batt <- battles[battles$comm_id == foo_id,][order(battles[battles$comm_id == foo_id,]$start_date1),]
  if(dim(foo_batt)[1] > 0){
  foo_starts <- as.Date(as.character(foo_batt$start_date))
  foo_ends <- as.Date(as.character(foo_batt$end_date))

  foo_rows <- NULL
  for(j in 1:length(foo_starts)){
    if(suppressWarnings(is.finite(max(which(foo_gen$edate2 <= foo_starts[j]))))){
    suppressWarnings(foo_start_temp <- max(which(foo_gen$edate2 <= foo_starts[j]))) # max(which()) and not which.max to get the maximum INDEX and not the INDEX OF THE MAXIMUM
    foo_start <- foo_gen[foo_start_temp,]
    foo_start$edate2 <- as.Date(foo_starts[j]) - 1
    foo_mid <- which((foo_gen$edate2 < foo_ends[j]) & (foo_gen$edate2 > foo_starts[j]))
    suppressWarnings(foo_end_temp <- max(which(foo_gen$edate2 <= foo_ends[j])))
    foo_end <- foo_gen[foo_end_temp,]
    foo_end$edate2 <- as.Date(foo_ends[j])
    foo_start$numbattles <- foo_battle
    foo_mid
    foo_end$numbattles <- foo_battle + 1
    foo_battle <- foo_battle + 1
    foo_start$battle_results <- foo_batt$battle_results[j]
    foo_start$CS_casualties <- foo_batt$CS_casualties[j]
    foo_start$CS_casualties_min <- foo_batt$casualties_C_min[j]
    foo_start$CS_casualties_max <- foo_batt$casualties_C_max[j]
    foo_start$US_casualties <- foo_batt$US_casualties[j]
    foo_start$US_casualties_min <- foo_batt$casualties_U_min[j]
    foo_start$US_casualties_max <- foo_batt$casualties_U_max[j]
    foo_start$CS_strength <- foo_batt$CS_strength[j]
    foo_start$CS_strength_min <- foo_batt$strength_C_min[j]
    foo_start$CS_strength_max <- foo_batt$strength_C_max[j]
    foo_start$US_strength <- foo_batt$US_strength[j]
    foo_start$US_strength_min <- foo_batt$strength_U_min[j]
    foo_start$US_strength_max <- foo_batt$strength_U_max[j]
    foo_start$battle <- foo_batt$battle[j]
    foo_start$battle_name <- foo_batt$battle_name[j]
    foo_start$time <- foo_start$time + (as.numeric(format(foo_start$edate2, "%Y")) - foo_start$year)
    foo_start$year <- as.numeric(format(foo_start$edate2, "%Y"))
    foo_end$time <- foo_end$time + (as.numeric(format(foo_end$edate2, "%Y")) - foo_end$year)
    foo_end$year <- as.numeric(format(foo_end$edate2, "%Y"))
    foo_rows <- rbind(foo_rows, foo_start)
    }
  }
  all_gens <- rbind(all_gens, foo_rows)}
}

### # Creating some weighting variables for the clustering in the MIMIC models
### # numobs_frame <- data.frame(1/table(all_gens$comm_id))
### # colnames(numobs_frame) <- c("comm_id", "weights")
### # all_gens <- merge(all_gens, numobs_frame)

# 1860 Presidential Vote variables
all_gens$lincolnvote <- NA
all_gens$lincolnvote <- 0*(all_gens$stateborn == "AL") +
                        0*(all_gens$stateborn == "AR") +
                        0.323*(all_gens$stateborn == "CA") +
                        0.581*(all_gens$stateborn == "CT") +
                        0.237*(all_gens$stateborn == "DE") +
                        0*(all_gens$stateborn == "FL") +
                        0*(all_gens$stateborn == "GA") +
                        0.507*(all_gens$stateborn == "IL") +
                        0.511*(all_gens$stateborn == "IN") +
                        0.546*(all_gens$stateborn == "IA") +
                        0.009*(all_gens$stateborn == "KY") +
                        0*(all_gens$stateborn == "LA") +
                        0.622*(all_gens$stateborn == "ME") +
                        0.025*(all_gens$stateborn == "MD") +
                        0.629*(all_gens$stateborn == "MA") +
                        0.572*(all_gens$stateborn == "MI") +
                        0.634*(all_gens$stateborn == "MN") +
                        0*(all_gens$stateborn == "MS") +
                        0.103*(all_gens$stateborn == "MO") +
                        0.569*(all_gens$stateborn == "NH") +
                        0.481*(all_gens$stateborn == "NJ") +
                        0.537*(all_gens$stateborn == "NY") +
                        0*(all_gens$stateborn == "NC") +
                        0.523*(all_gens$stateborn == "OH") +
                        0.361*(all_gens$stateborn == "OR") +
                        0.563*(all_gens$stateborn == "PA") +
                        0.614*(all_gens$stateborn == "RI") +
                        0*(all_gens$stateborn == "SC") +
                        0*(all_gens$stateborn == "TN") +
                        0*(all_gens$stateborn == "TX") +
                        0.757*(all_gens$stateborn == "VT") +
                        0.011*(all_gens$stateborn == "VA") +
                        0.566*(all_gens$stateborn == "WI")

all_gens$lincolnvote[all_gens$stateborn == "VA"] <- 0.011 # some odd behavior with Virginia
all_gens$lincolnvote[all_gens$foreignborn == 1] <- 0.399 # foreign-born individuals get the national mean
all_gens$native_lincoln <- (1-all_gens$foreignborn)*all_gens$lincolnvote


# Casualties/victories for commanders.
all_gens2 <- all_gens
all_gens2$cumCS_strength <- NA
all_gens2$cumCS_strength_min <- NA
all_gens2$cumCS_strength_max <- NA
all_gens2$cumUS_strength <- NA
all_gens2$cumUS_strength_min <- NA
all_gens2$cumUS_strength_max <- NA
all_gens2$cumCS_casualties <- NA
all_gens2$cumCS_casualties_min <- NA
all_gens2$cumCS_casualties_max <- NA
all_gens2$cumUS_casualties <- NA
all_gens2$cumUS_casualties_min <- NA
all_gens2$cumUS_casualties_max <- NA
all_gens2$cumvictories <- NA


# Cumulative strength/casualties/victories/etc.
for(i in 1:length(unique(all_gens$comm_id))){
  foo <- all_gens[all_gens$comm_id == unique(all_gens$comm_id)[i],]
  all_gens2[all_gens$comm_id == unique(all_gens$comm_id)[i],]$cumCS_strength <- cumsum(c(0, foo$CS_strength)[-(length(foo$CS_strength) + 1)])
  all_gens2[all_gens$comm_id == unique(all_gens$comm_id)[i],]$cumCS_strength_min <- cumsum(c(0, foo$CS_strength_min)[-(length(foo$CS_strength_min) + 1)])
  all_gens2[all_gens$comm_id == unique(all_gens$comm_id)[i],]$cumCS_strength_max <- cumsum(c(0, foo$CS_strength_max)[-(length(foo$CS_strength_max) + 1)])
  all_gens2[all_gens$comm_id == unique(all_gens$comm_id)[i],]$cumUS_strength <- cumsum(c(0, foo$US_strength)[-(length(foo$US_strength) + 1)])
  all_gens2[all_gens$comm_id == unique(all_gens$comm_id)[i],]$cumUS_strength_min <- cumsum(c(0, foo$US_strength_min)[-(length(foo$US_strength_min) + 1)])
  all_gens2[all_gens$comm_id == unique(all_gens$comm_id)[i],]$cumUS_strength_max <- cumsum(c(0, foo$US_strength_max)[-(length(foo$US_strength_max) + 1)])
  all_gens2[all_gens$comm_id == unique(all_gens$comm_id)[i],]$cumCS_casualties <- cumsum(c(0, foo$CS_casualties)[-(length(foo$CS_casualties) + 1)])
  all_gens2[all_gens$comm_id == unique(all_gens$comm_id)[i],]$cumCS_casualties_min <- cumsum(c(0, foo$CS_casualties_min)[-(length(foo$CS_casualties_min) + 1)])
  all_gens2[all_gens$comm_id == unique(all_gens$comm_id)[i],]$cumCS_casualties_max <- cumsum(c(0, foo$CS_casualties_max)[-(length(foo$CS_casualties_max) + 1)])
  all_gens2[all_gens$comm_id == unique(all_gens$comm_id)[i],]$cumUS_casualties <- cumsum(c(0, foo$US_casualties)[-(length(foo$US_casualties) + 1)])
  all_gens2[all_gens$comm_id == unique(all_gens$comm_id)[i],]$cumUS_casualties_min <- cumsum(c(0, foo$US_casualties_min)[-(length(foo$US_casualties_min) + 1)])
  all_gens2[all_gens$comm_id == unique(all_gens$comm_id)[i],]$cumUS_casualties_max <- cumsum(c(0, foo$US_casualties_max)[-(length(foo$US_casualties_max) + 1)])
  all_gens2[all_gens$comm_id == unique(all_gens$comm_id)[i],]$cumvictories <- cumsum(c(0, (foo$battle_results == "Victory"))[-(length(foo$battle_results) + 1)])
}

all_gens2$cumoppcasualties <- (all_gens2$cumUS_casualties/all_gens2$cumUS_strength)*as.numeric(all_gens2$confed == 1) +
                              (all_gens2$cumCS_casualties/all_gens2$cumCS_strength)*as.numeric(all_gens2$confed == 0)
all_gens2$cumoppcasualties_min <- (all_gens2$cumUS_casualties_min/all_gens2$cumUS_strength_min)*as.numeric(all_gens2$confed == 1) +
                                  (all_gens2$cumCS_casualties_min/all_gens2$cumCS_strength_min)*as.numeric(all_gens2$confed == 0)
all_gens2$cumoppcasualties_max <- (all_gens2$cumUS_casualties_max/all_gens2$cumUS_strength_max)*as.numeric(all_gens2$confed == 1) +
                                  (all_gens2$cumCS_casualties_max/all_gens2$cumCS_strength_max)*as.numeric(all_gens2$confed == 0)
all_gens2$cumoppcasualties[is.na(all_gens2$cumoppcasualties)] <- 0
all_gens2$cumoppcasualties_min[is.na(all_gens2$cumoppcasualties_min)] <- 0
all_gens2$cumoppcasualties_max[is.na(all_gens2$cumoppcasualties_max)] <- 0
all_gens2$winpct <- all_gens2$cumvictories/all_gens2$numbattles
all_gens2$winpct[is.na(all_gens2$winpct)] <- 0
all_gens2$log_cumoppcasualties <- log(all_gens2$cumoppcasualties + 1)
all_gens2$log_cumoppcasualties_min <- log(all_gens2$cumoppcasualties_min + 1)
all_gens2$log_cumoppcasualties_max <- log(all_gens2$cumoppcasualties_max + 1)
all_gens2$log_winpct <- log(all_gens2$winpct + 1)


# Generate weights for later analysis.
all_gens2$weights <- NA
all_gens2$numobs <- NA
for(i in 1:length(unique(all_gens$comm_id))){
  foo <- all_gens[all_gens$comm_id == unique(all_gens$comm_id)[i],]
  all_gens2[all_gens$comm_id == unique(all_gens$comm_id)[i],]$numobs <- dim(all_gens[all_gens$comm_id == unique(all_gens$comm_id)[i],])[1]
}

all_gens2$weights <- 1/all_gens2$numobs
all_gens2$weights <- all_gens2$weights*(length(all_gens2$weights)/
                                          sum(all_gens2$weights))


# Confederate partisanship factor
all_gens2$confed_partisanship <- -PCA(all_gens2[,c("prewarrepublican",
                                                   "prewarwhig",
                                                   "prewardemocrat",
                                                   "ever_free_soil")],
                                      row.w= all_gens2$weights,
                                      graph = FALSE)$ind$coord[,"Dim.1"]
all_gens <- all_gens2


# Save as Stata file to convert to MPlus
write.dta(all_gens,file.path(DIR_DATA, "ACH-GeneralsMerged.dta"))  


# Run some outside programs
# Set up Stata for use in R, and make sure Stata is installed.
# Need to use Stata 15.
options("RStata.StataPath" = STATA_PATH)
options("RStata.StataVersion" = 15)
chooseStataBin()

# Install stata2mplus
stata("net install https://stats.idre.ucla.edu/stat/stata/ado/analysis/stata2mplus")

# Write .do file that will be based on the current directory structure
unlink(file.path(DIR_CODE,"ACH-Conversion.do"))
statadir <- paste(getwd(),"/",file.path(DIR_DATA), sep = "")
cat('use "',statadir, '/ACH-GeneralsMerged.dta", clear', "\n", sep = "", 
    file = file.path(DIR_CODE,"ACH-Conversion.do"), append = T)
cat('stata2mplus using "',statadir, '/ACH", replace', 
    "\n", sep = "",
    file = file.path(DIR_CODE,"ACH-Conversion.do"), append = T)

# Convert ACH-GeneralsMerged.dta to MPlus format.
stata(file.path(DIR_CODE,"ACH-Conversion.do"))

# Append the correct directory to the MPlus file headers and delete potentially "wrong" directoried files
unlink("header.txt")
unlink(file.path(DIR_CODE,"ACH-MIMIC-NoBootstrap.inp"))
unlink(file.path(DIR_CODE,"ACH-MIMIC-NoBootstrapMin.inp"))
unlink(file.path(DIR_CODE,"ACH-MIMIC-NoBootstrapMax.inp"))
unlink(file.path(DIR_CODE,"ACH-MIMIC-Bootstrap.inp"))
unlink(file.path(DIR_CODE,"ACH-MIMIC-BootstrapMin.inp"))
unlink(file.path(DIR_CODE,"ACH-MIMIC-BootstrapMax.inp"))

# Delete old output files
unlink(file.path(DIR_CODE,"ACH-MIMIC-NoBootstrap.out"))
unlink(file.path(DIR_CODE,"ACH-MIMIC-NoBootstrapMin.out"))
unlink(file.path(DIR_CODE,"ACH-MIMIC-NoBootstrapMax.out"))
unlink(file.path(DIR_CODE,"ACH-MIMIC-Bootstrap.out"))
unlink(file.path(DIR_CODE,"ACH-MIMIC-BootstrapMin.out"))
unlink(file.path(DIR_CODE,"ACH-MIMIC-BootstrapMax.out"))
unlink(file.path(DIR_CODE,"ACH-MIMIC-NoBootstrap.sav"))
unlink(file.path(DIR_CODE,"ACH-MIMIC-NoBootstrapMin.sav"))
unlink(file.path(DIR_CODE,"ACH-MIMIC-NoBootstrapMax.sav"))
unlink(file.path(DIR_CODE,"ACH-MIMIC-Bootstrap.sav"))
unlink(file.path(DIR_CODE,"ACH-MIMIC-BootstrapMin.sav"))
unlink(file.path(DIR_CODE,"ACH-MIMIC-BootstrapMax.sav"))

# Create header file and append to the top of the "No Directory" MPlus files
# This is so the user can run the MPlus models without manually editing each model file
cat("Data:\n","  File is ",statadir,"/ACH.dat;\n",sep="",file="header.txt")
writeLines(c(readLines("header.txt"), readLines("ACH-MIMIC-NoBootstrap-NoDir.inp")), file.path(DIR_CODE,"ACH-MIMIC-NoBootstrap.inp"))
writeLines(c(readLines("header.txt"), readLines("ACH-MIMIC-NoBootstrapMin-NoDir.inp")), file.path(DIR_CODE,"ACH-MIMIC-NoBootstrapMin.inp"))
writeLines(c(readLines("header.txt"), readLines("ACH-MIMIC-NoBootstrapMax-NoDir.inp")), file.path(DIR_CODE,"ACH-MIMIC-NoBootstrapMax.inp"))
writeLines(c(readLines("header.txt"), readLines("ACH-MIMIC-Bootstrap-NoDir.inp")), file.path(DIR_CODE,"ACH-MIMIC-Bootstrap.inp"))
writeLines(c(readLines("header.txt"), readLines("ACH-MIMIC-BootstrapMin-NoDir.inp")), file.path(DIR_CODE,"ACH-MIMIC-BootstrapMin.inp"))
writeLines(c(readLines("header.txt"), readLines("ACH-MIMIC-BootstrapMax-NoDir.inp")), file.path(DIR_CODE,"ACH-MIMIC-BootstrapMax.inp"))

# Run the MPlus models in the code directory.
# Make sure MPlus is installed.
suppressWarnings(try(runModels(file.path(paste(getwd()),DIR_CODE))))

# Load in the MPlus scores
# Load in the models (some of the fit statistics are in the non-bootstrapped model)
mplusboot <- readModels(target = file.path(DIR_CODE, "ACH-MIMIC-Bootstrap.out"))
mplusnoboot <- readModels(target = file.path(DIR_CODE, "ACH-MIMIC-NoBootstrap.out"))

# Next, add competence to the data frame
all_gens$comp <- mplusboot$savedata$COMPETENCE
all_gens$comp <- as.numeric(scale(all_gens$comp))

# Rescale the "confederate loyalty" trait to be one of "same side loyalty"
# Do this by taking the negative of the measure for union generals
all_gens$loyal <- mplusboot$savedata$CSALOYALTY
all_gens$loyal <- as.numeric(all_gens$loyal)
all_gens$newloyal <- as.numeric(scale((all_gens$confed == 1)*scale(all_gens$loyal) -
                                (all_gens$confed == 0)*scale(all_gens$loyal)))


# Merge the Generals data with the battle-level data, remove some extraneous variables,
# and create some additional variables
battles$edate2 <- as.Date(battles$start_date) - 1

all_gens_trim <- subset(all_gens, select = -c(US_casualties,
                                              CS_casualties,
                                              US_strength,
                                              CS_strength,
                                              battle_results,
                                              battle))

battles_trim <- subset(battles, select = -c(confed))
battles_gen <- merge(all_gens_trim, battles_trim, by = c("edate2", "comm_id"))


battles_gen$same_side <- as.numeric(battles_gen$confed == battles_gen$confed_battle)
battles_gen$same_state <- as.numeric(battles_gen$stateborn == as.character(battles_gen$state))
battles_gen$strength_ratio <- (battles_gen$US_strength/battles_gen$CS_strength)*(1 - battles_gen$confed) +
  (battles_gen$CS_strength/battles_gen$US_strength)*(battles_gen$confed)


# Remove some duplicate battle-commander dyads
battles_gen_unique <- battles_gen[as.numeric(rownames(unique(battles_gen[,c('battle','comm_id')]))),]


# Create battle-level data (loyalty/competence/fixed effects)
battles_merged_singleentry <- NULL
genFE <- NULL
for(i in 1:length(unique(battles_gen_unique$battle))){
  foo <- battles_gen_unique[battles_gen_unique$battle == unique(battles_gen_unique$battle)[i],]
  numgen <- dim(foo)[1]
  suppressWarnings(max_confed_comp <- max(foo$comp[foo$confed == 1], na.rm = TRUE))
  suppressWarnings(max_confed_loyal <- max(foo$newloyal[foo$confed == 1], na.rm = TRUE))
  suppressWarnings(max_union_comp <- max(foo$comp[foo$confed == 0], na.rm = TRUE))
  suppressWarnings(max_union_loyal <- max(foo$newloyal[foo$confed == 0], na.rm = TRUE))
  suppressWarnings(min_confed_comp <- min(foo$comp[foo$confed == 1], na.rm = TRUE))
  suppressWarnings(min_confed_loyal <- min(foo$newloyal[foo$confed == 1], na.rm = TRUE))
  suppressWarnings(min_union_comp <- min(foo$comp[foo$confed == 0], na.rm = TRUE))
  suppressWarnings(min_union_loyal <- min(foo$newloyal[foo$confed == 0], na.rm = TRUE))
  confed_comp <- mean(foo$comp[foo$confed == 1], na.rm = TRUE)
  confed_loyal <- mean(foo$newloyal[foo$confed == 1], na.rm = TRUE)
  union_comp <- mean(foo$comp[foo$confed == 0], na.rm = TRUE)
  union_loyal <- mean(foo$newloyal[foo$confed == 0], na.rm = TRUE)
  confed_loyal_crossover <- mean(foo$newloyal[foo$confed == 1 & !(foo$stateborn %in% CSA)], na.rm = TRUE)
  union_loyal_crossover <- mean(foo$newloyal[foo$confed == 0 & foo$stateborn %in% CSA], na.rm = TRUE)

    battles_merged_singleentry <- rbind(battles_merged_singleentry,
                                      data.frame(foo,
                                                 confed_comp, confed_loyal,
                                                 union_comp, union_loyal,
                                                 confed_loyal_crossover, union_loyal_crossover,
                                                 max_confed_comp, max_confed_loyal,
                                                 max_union_comp, max_union_loyal,
                                                 min_confed_comp, min_confed_loyal,
                                                 min_union_comp, min_union_loyal,
                                                 numgen))
  genFE <- rbind(genFE, as.numeric(sort(unique(battles_gen_unique$comm_id)) %in%
                                     unique(foo$comm_id)))
}


# Drop duplicate battles
battles_merged_singleentry <- battles_merged_singleentry[!duplicated(battles_merged_singleentry[,'battle']),]

# NAs for battles where loyalty/competence information is missing
battles_merged_singleentry$max_confed_comp[!is.finite(battles_merged_singleentry$max_confed_comp)] <- NA
battles_merged_singleentry$max_confed_loyal[!is.finite(battles_merged_singleentry$max_confed_loyal)] <- NA
battles_merged_singleentry$max_union_comp[!is.finite(battles_merged_singleentry$max_union_comp)] <- NA
battles_merged_singleentry$max_union_loyal[!is.finite(battles_merged_singleentry$max_union_loyal)] <- NA
battles_merged_singleentry$min_confed_comp[!is.finite(battles_merged_singleentry$min_confed_comp)] <- NA
battles_merged_singleentry$min_confed_loyal[!is.finite(battles_merged_singleentry$min_confed_loyal)] <- NA
battles_merged_singleentry$min_union_comp[!is.finite(battles_merged_singleentry$min_union_comp)] <- NA
battles_merged_singleentry$min_union_loyal[!is.finite(battles_merged_singleentry$min_union_loyal)] <- NA
battles_merged_singleentry$confed_comp[!is.finite(battles_merged_singleentry$confed_comp)] <- NA
battles_merged_singleentry$confed_loyal[!is.finite(battles_merged_singleentry$confed_loyal)] <- NA
battles_merged_singleentry$union_comp[!is.finite(battles_merged_singleentry$union_comp)] <- NA
battles_merged_singleentry$union_loyal[!is.finite(battles_merged_singleentry$union_loyal)] <- NA
battles_merged_singleentry$confed_loyal_crossover[!is.finite(battles_merged_singleentry$confed_loyal_crossover)] <- 0
battles_merged_singleentry$union_loyal_crossover[!is.finite(battles_merged_singleentry$union_loyal_crossover)] <- 0


# Generals fixed effects
colnames(genFE) <- sort(unique(battles_gen_unique$comm_id))

# Separate out those generals who fought in more than six battles 
# (Makes for easier convergence in ordered logit models)
genFE.multiple <- genFE[, apply(genFE,2,sum) > 6]

# Rescale strength to be in 1000s (useful for convergence/interpretation when used as a covariate)
battles_merged_singleentry$CS_strength_1000 <- battles_merged_singleentry$CS_strength/1000
battles_merged_singleentry$US_strength_1000 <- battles_merged_singleentry$US_strength/1000

# Incorporate fixed effects into the data frame
battles_merged_singleentry$genFE.multiple <- genFE.multiple

# Ensure the battle outcome DV is a factor
battles_merged_singleentry$outcome <- factor(battles_merged_singleentry$outcome)


### Estimate battle-specific models
# Sink and bootcov2 to prevent a bunch of annoying output from bootcov
sink("NULL")
bootcov2 <- function(...){suppressWarnings(bootcov(...))}

# Set replication seed
set.seed(10)

# First, estimate ordered logit based on outcomes
# "Standard" models for the main paper 
BS.mod.0 <- lrm(outcome ~ 1, data = battles_merged_singleentry, x = TRUE, y = TRUE)
BS.mod.1 <- bootcov2(lrm(outcome ~ confed_comp + union_comp + confed_loyal + union_loyal,
                         data = battles_merged_singleentry, x = TRUE, y = TRUE), B = 1000)
BS.mod.2 <- bootcov2(lrm(outcome ~ confed_comp + union_comp + confed_loyal + union_loyal +
                         CS_strength_1000 + US_strength_1000,
                         data = battles_merged_singleentry, x = TRUE, y = TRUE), B = 1000)
BS.mod.3 <- bootcov2(lrm(outcome ~ confed_comp + union_comp + confed_loyal + union_loyal +
                         CS_strength_1000 + US_strength_1000 + confed_battle,
                         data = battles_merged_singleentry, x = TRUE, y = TRUE), B = 1000)
BS.mod.4 <- bootcov2(lrm(outcome ~ confed_comp + union_comp +
                         (confed_loyal + union_loyal)*confed_battle +
                         CS_strength_1000 + US_strength_1000,
                         data = battles_merged_singleentry, x = TRUE, y = TRUE), B = 1000)
BS.mod.5 <- bootcov2(lrm(outcome ~ confed_comp + union_comp +
                         (confed_loyal + union_loyal)*confed_battle +
                         CS_strength_1000 + US_strength_1000 + attacker,
                         data = battles_merged_singleentry, x = TRUE, y = TRUE), B = 1000)
BS.mod.6 <- bootcov2(lrm(outcome ~ confed_comp + union_comp +
                         (confed_loyal + union_loyal)*confed_battle +
                         CS_strength_1000 + US_strength_1000 + attacker + genFE.multiple,
                         data = battles_merged_singleentry, x = TRUE, y = TRUE), B = 1000)

# "Crossover" models for Appendix C
BS.mod.1.cross <- bootcov2(lrm(outcome ~ confed_comp + union_comp + confed_loyal + union_loyal +
                               confed_loyal_crossover + union_loyal_crossover,
                               data = battles_merged_singleentry, x = TRUE, y = TRUE), B = 1000)
BS.mod.2.cross <-  bootcov2(lrm(outcome ~ confed_comp + union_comp + confed_loyal + union_loyal +
                                confed_loyal_crossover + union_loyal_crossover +
                                CS_strength_1000 + US_strength_1000,
                                data = battles_merged_singleentry, x = TRUE, y = TRUE), B = 1000)
BS.mod.3.cross <- bootcov2(lrm(outcome ~ confed_comp + union_comp + confed_loyal + union_loyal +
                               confed_loyal_crossover + union_loyal_crossover +
                               CS_strength_1000 + US_strength_1000 + confed_battle,
                               data = battles_merged_singleentry, x = TRUE, y = TRUE), B = 1000)
BS.mod.4.cross <-  bootcov2(lrm(outcome ~ confed_comp + union_comp +
                                (confed_loyal + union_loyal)*confed_battle +
                                (confed_loyal_crossover + union_loyal_crossover)*confed_battle +
                                CS_strength_1000 + US_strength_1000,
                                data = battles_merged_singleentry, x = TRUE, y = TRUE), B = 1000)
BS.mod.5.cross <- bootcov2(lrm(outcome ~ confed_comp + union_comp +
                               (confed_loyal + union_loyal)*confed_battle +
                               (confed_loyal_crossover + union_loyal_crossover)*confed_battle +
                               CS_strength_1000 + US_strength_1000 + attacker,
                               data= battles_merged_singleentry, x = TRUE, y = TRUE), B = 1000)
BS.mod.6.cross <- bootcov2(lrm(outcome ~ confed_comp + union_comp +
                               (confed_loyal + union_loyal)*confed_battle +
                               (confed_loyal_crossover + union_loyal_crossover)*confed_battle +
                               CS_strength_1000 + US_strength_1000 + attacker + genFE.multiple,
                               data= battles_merged_singleentry, x = TRUE, y = TRUE), B = 1000)

# Models using the minimum levels of loyalty and competence for Appendix C
BS.mod.1.min <- bootcov2(lrm(outcome ~ min_confed_comp + min_union_comp +
                             min_confed_loyal + min_union_loyal,
                             data = battles_merged_singleentry, x = TRUE, y = TRUE), B = 1000)
BS.mod.2.min <- bootcov2(lrm(outcome ~ min_confed_comp + min_union_comp + min_confed_loyal + min_union_loyal +
                             CS_strength_1000 + US_strength_1000,
                             data = battles_merged_singleentry, x = TRUE, y = TRUE), B = 1000)
BS.mod.3.min <- bootcov2(lrm(outcome ~ min_confed_comp + min_union_comp + min_confed_loyal + min_union_loyal +
                             CS_strength_1000 + US_strength_1000 + confed_battle,
                             data = battles_merged_singleentry, x = TRUE, y = TRUE), B = 1000)
BS.mod.4.min <- bootcov2(lrm(outcome ~ min_confed_comp + min_union_comp + (min_confed_loyal + min_union_loyal)*confed_battle +
                             CS_strength_1000 + US_strength_1000,
                             data = battles_merged_singleentry, x = TRUE, y = TRUE), B = 1000)
BS.mod.5.min <- bootcov2(lrm(outcome ~ min_confed_comp + min_union_comp + (min_confed_loyal + min_union_loyal)*confed_battle +
                             CS_strength_1000 + US_strength_1000 + attacker,
                             data = battles_merged_singleentry, x = TRUE, y = TRUE), B = 1000)
BS.mod.6.min <- bootcov2(lrm(outcome ~ min_confed_comp + min_union_comp + (min_confed_loyal + min_union_loyal)*confed_battle +
                             CS_strength_1000 + US_strength_1000 + attacker + genFE.multiple,
                             data = battles_merged_singleentry, x = TRUE, y = TRUE), B = 1000)

BS.mod.1.max <- bootcov2(lrm(outcome ~ max_confed_comp + max_union_comp + 
                             max_confed_loyal + max_union_loyal,
                             data = battles_merged_singleentry, x = TRUE, y = TRUE), B = 1000)
BS.mod.2.max <- bootcov2(lrm(outcome ~ max_confed_comp + max_union_comp + 
                             max_confed_loyal + max_union_loyal +
                             CS_strength_1000 + US_strength_1000,
                             data = battles_merged_singleentry, x = TRUE, y = TRUE), B = 1000)
BS.mod.3.max <- bootcov2(lrm(outcome ~ max_confed_comp + max_union_comp + 
                             max_confed_loyal + max_union_loyal +
                             CS_strength_1000 + US_strength_1000+ confed_battle,
                             data = battles_merged_singleentry, x = TRUE, y = TRUE), B = 1000)
BS.mod.4.max <- bootcov2(lrm(outcome ~ max_confed_comp + max_union_comp + 
                             (max_confed_loyal + max_union_loyal)*confed_battle +
                             CS_strength_1000 + US_strength_1000,
                             data = battles_merged_singleentry, x = TRUE, y = TRUE), B = 1000)
BS.mod.5.max <- bootcov2(lrm(outcome ~ max_confed_comp + max_union_comp + 
                             (max_confed_loyal + max_union_loyal)*confed_battle +
                             CS_strength_1000 + US_strength_1000 + attacker,
                             data = battles_merged_singleentry, x = TRUE, y = TRUE), B = 1000)
BS.mod.6.max <- bootcov2(lrm(outcome ~ max_confed_comp + max_union_comp + 
                             (max_confed_loyal + max_union_loyal)*confed_battle +
                             CS_strength_1000 + US_strength_1000 + attacker + genFE.multiple,
                             data = battles_merged_singleentry, x = TRUE, y = TRUE), B = 1000)
sink()
# Casualty models
# Can't do quasibinomial with rms, so use glm(); 
# First estimate the normal model for the estimates and fit stats
# Then estimate the bootstrapped covariance martrix
LER.mod.0 <- glm(cbind(round(CS_casualties), round(US_casualties)) ~ 
                    confed_comp - confed_comp + union_comp - union_comp +
                    confed_loyal - confed_loyal + union_loyal - union_loyal,
                    data = battles_merged_singleentry,  family = quasibinomial(link = "logit"))
LER.mod.1 <- glm(cbind(round(CS_casualties), round(US_casualties)) ~ 
                    confed_comp + union_comp + confed_loyal + union_loyal,
                    data = battles_merged_singleentry,  family = quasibinomial(link = "logit"))
LER.mod.2 <- glm(cbind(round(CS_casualties), round(US_casualties)) ~ 
                    confed_comp + union_comp + confed_loyal + union_loyal +
                    CS_strength_1000 + US_strength_1000,
                    data = battles_merged_singleentry,  family = quasibinomial(link = "logit"))
LER.mod.3 <- glm(cbind(round(CS_casualties), round(US_casualties)) ~ 
                    confed_comp + union_comp + confed_loyal + union_loyal +
                    CS_strength_1000 + US_strength_1000 + confed_battle,
                    data = battles_merged_singleentry,  family = quasibinomial(link = "logit"))
LER.mod.4 <- glm(cbind(round(CS_casualties), round(US_casualties)) ~ 
                    confed_comp + union_comp + (confed_loyal + union_loyal)*confed_battle +
                    CS_strength_1000 + US_strength_1000,
                    data = battles_merged_singleentry,  family = quasibinomial(link = "logit"))
LER.mod.5 <- glm(cbind(round(CS_casualties), round(US_casualties)) ~ 
                    confed_comp + union_comp + (confed_loyal + union_loyal)*confed_battle +
                    CS_strength_1000 + US_strength_1000 + attacker,
                    data = battles_merged_singleentry,  family = quasibinomial(link = "logit"))
LER.mod.6 <- glm(cbind(round(CS_casualties), round(US_casualties)) ~ 
                    confed_comp + union_comp + (confed_loyal + union_loyal)*confed_battle +
                    CS_strength_1000 + US_strength_1000 + attacker + genFE.multiple,
                    data = battles_merged_singleentry,  family = quasibinomial(link = "logit"))

# Crossover models for Appendix C
LER.mod.1.cross <- glm(cbind(round(CS_casualties), round(US_casualties)) ~ 
                          confed_comp + union_comp +
                          confed_loyal + union_loyal +
                          confed_loyal_crossover + union_loyal_crossover,
                          data = battles_merged_singleentry,  family = quasibinomial(link = "logit"))

LER.mod.2.cross <- glm(cbind(round(CS_casualties), round(US_casualties)) ~ 
                          confed_comp + union_comp +
                          confed_loyal + union_loyal +
                          confed_loyal_crossover + union_loyal_crossover +
                          CS_strength_1000 + US_strength_1000,
                          data = battles_merged_singleentry,  family = quasibinomial(link = "logit"))

LER.mod.3.cross <- glm(cbind(round(CS_casualties), round(US_casualties)) ~ 
                          confed_comp + union_comp +
                          confed_loyal + union_loyal +
                          confed_loyal_crossover + union_loyal_crossover +
                          CS_strength_1000 + US_strength_1000 + confed_battle,
                          data = battles_merged_singleentry,  family = quasibinomial(link = "logit"))

LER.mod.4.cross <- glm(cbind(round(CS_casualties), round(US_casualties)) ~ 
                          confed_comp + union_comp +
                          (confed_loyal + union_loyal)*confed_battle +
                          (confed_loyal_crossover + union_loyal_crossover)*confed_battle +
                          CS_strength_1000 + US_strength_1000,
                          data = battles_merged_singleentry,  family = quasibinomial(link = "logit"))

LER.mod.5.cross <- glm(cbind(round(CS_casualties), round(US_casualties)) ~ 
                          confed_comp + union_comp +
                          (confed_loyal + union_loyal)*confed_battle +
                          (confed_loyal_crossover + union_loyal_crossover)*confed_battle +
                          CS_strength_1000 + US_strength_1000 + attacker,
                          data = battles_merged_singleentry,  family = quasibinomial(link = "logit"))

LER.mod.6.cross <- glm(cbind(round(CS_casualties), round(US_casualties)) ~ 
                          confed_comp + union_comp +
                          (confed_loyal + union_loyal)*confed_battle +
                          (confed_loyal_crossover + union_loyal_crossover)*confed_battle +
                          CS_strength_1000 + US_strength_1000 + attacker + genFE.multiple,
                          data = battles_merged_singleentry,  family = quasibinomial(link = "logit"))


# Models using the minimum and maximum of the estimated traits; for Appendix C
LER.mod.1.min <- glm(cbind(round(CS_casualties), round(US_casualties)) ~
                        min_confed_comp + min_union_comp + min_confed_loyal + min_union_loyal,
                        data = battles_merged_singleentry,  family = quasibinomial(link = "logit"))
LER.mod.2.min <- glm(cbind(round(CS_casualties), round(US_casualties)) ~ 
                        min_confed_comp + min_union_comp + min_confed_loyal + min_union_loyal +
                        CS_strength_1000 + US_strength_1000,
                        data = battles_merged_singleentry,  family = quasibinomial(link = "logit"))
LER.mod.3.min <- glm(cbind(round(CS_casualties), round(US_casualties)) ~ 
                        min_confed_comp + min_union_comp + min_confed_loyal + min_union_loyal +
                        CS_strength_1000 + US_strength_1000 + confed_battle,
                        data = battles_merged_singleentry,  family = quasibinomial(link = "logit"))
LER.mod.4.min <- glm(cbind(round(CS_casualties), round(US_casualties)) ~ 
                        min_confed_comp + min_union_comp + (min_confed_loyal + min_union_loyal)*confed_battle +
                        CS_strength_1000 + US_strength_1000,
                        data = battles_merged_singleentry,  family = quasibinomial(link = "logit"))
LER.mod.5.min <- glm(cbind(round(CS_casualties), round(US_casualties)) ~ 
                        min_confed_comp + min_union_comp + (min_confed_loyal + min_union_loyal)*confed_battle +
                        CS_strength_1000 + US_strength_1000 + attacker,
                        data = battles_merged_singleentry,  family = quasibinomial(link = "logit"))
LER.mod.6.min <- glm(cbind(round(CS_casualties), round(US_casualties)) ~ 
                        min_confed_comp + min_union_comp + (min_confed_loyal + min_union_loyal)*confed_battle +
                        CS_strength_1000 + US_strength_1000 + attacker + genFE.multiple,
                        data = battles_merged_singleentry,  family = quasibinomial(link = "logit"))

LER.mod.1.max <- glm(cbind(round(CS_casualties), round(US_casualties)) ~ 
                        max_confed_comp + max_union_comp + max_confed_loyal + max_union_loyal,
                        data = battles_merged_singleentry,  family = quasibinomial(link = "logit"))
LER.mod.2.max <- glm(cbind(round(CS_casualties), round(US_casualties)) ~ 
                        max_confed_comp + max_union_comp + max_confed_loyal + max_union_loyal +
                        CS_strength_1000 + US_strength_1000,
                        data = battles_merged_singleentry,  family = quasibinomial(link = "logit"))
LER.mod.3.max <- glm(cbind(round(CS_casualties), round(US_casualties)) ~ 
                        max_confed_comp + max_union_comp + max_confed_loyal + max_union_loyal +
                        CS_strength_1000 + US_strength_1000 + confed_battle,
                        data = battles_merged_singleentry,  family = quasibinomial(link = "logit"))
LER.mod.4.max <- glm(cbind(round(CS_casualties), round(US_casualties)) ~ 
                        max_confed_comp + max_union_comp + (max_confed_loyal + max_union_loyal)*confed_battle +
                        CS_strength_1000 + US_strength_1000,
                        data = battles_merged_singleentry,  family = quasibinomial(link = "logit"))
LER.mod.5.max <- glm(cbind(round(CS_casualties), round(US_casualties)) ~ 
                        max_confed_comp + max_union_comp + (max_confed_loyal + max_union_loyal)*confed_battle +
                        CS_strength_1000 + US_strength_1000 + attacker,
                        data = battles_merged_singleentry,  family = quasibinomial(link = "logit"))
LER.mod.6.max <- glm(cbind(round(CS_casualties), round(US_casualties)) ~ 
                        max_confed_comp + max_union_comp + (max_confed_loyal + max_union_loyal)*confed_battle +
                        CS_strength_1000 + US_strength_1000 + attacker + genFE.multiple,
                        data = battles_merged_singleentry,  family = quasibinomial(link = "logit"))


# Bootstrapping the quasibinomial models
set.seed(10)
LER.mod.1.boot.out <- boot(data=battles_merged_singleentry, model = LER.mod.1, statistic= mod.boot, R=1000)
LER.mod.2.boot.out <- boot(data=battles_merged_singleentry, model = LER.mod.2, statistic= mod.boot, R=1000)
LER.mod.3.boot.out <- boot(data=battles_merged_singleentry, model = LER.mod.3, statistic= mod.boot, R=1000)
LER.mod.4.boot.out <- boot(data=battles_merged_singleentry, model = LER.mod.4, statistic= mod.boot, R=1000)
LER.mod.5.boot.out <- boot(data=battles_merged_singleentry, model = LER.mod.5, statistic= mod.boot, R=1000)
LER.mod.6.boot.out <- boot(data=battles_merged_singleentry, model = LER.mod.6, statistic= mod.boot, R=1000)
LER.mod.1.cross.boot.out <- boot(data=battles_merged_singleentry, model = LER.mod.1.cross, statistic= mod.boot, R=1000)
LER.mod.2.cross.boot.out <- boot(data=battles_merged_singleentry, model = LER.mod.2.cross, statistic= mod.boot, R=1000)
LER.mod.3.cross.boot.out <- boot(data=battles_merged_singleentry, model = LER.mod.3.cross, statistic= mod.boot, R=1000)
LER.mod.4.cross.boot.out <- boot(data=battles_merged_singleentry, model = LER.mod.4.cross, statistic= mod.boot, R=1000)
LER.mod.5.cross.boot.out <- boot(data=battles_merged_singleentry, model = LER.mod.5.cross, statistic= mod.boot, R=1000)
LER.mod.6.cross.boot.out <- boot(data=battles_merged_singleentry, model = LER.mod.6.cross, statistic= mod.boot, R=1000)
LER.mod.1.min.boot.out <- boot(data=battles_merged_singleentry, model = LER.mod.1.min, statistic= mod.boot, R=1000)
LER.mod.2.min.boot.out <- boot(data=battles_merged_singleentry, model = LER.mod.2.min, statistic= mod.boot, R=1000)
LER.mod.3.min.boot.out <- boot(data=battles_merged_singleentry, model = LER.mod.3.min, statistic= mod.boot, R=1000)
LER.mod.4.min.boot.out <- boot(data=battles_merged_singleentry, model = LER.mod.4.min, statistic= mod.boot, R=1000)
LER.mod.5.min.boot.out <- boot(data=battles_merged_singleentry, model = LER.mod.5.min, statistic= mod.boot, R=1000)
LER.mod.6.min.boot.out <- boot(data=battles_merged_singleentry, model = LER.mod.6.min, statistic= mod.boot, R=1000)
LER.mod.1.max.boot.out <- boot(data=battles_merged_singleentry, model = LER.mod.1.max, statistic= mod.boot, R=1000)
LER.mod.2.max.boot.out <- boot(data=battles_merged_singleentry, model = LER.mod.2.max, statistic= mod.boot, R=1000)
LER.mod.3.max.boot.out <- boot(data=battles_merged_singleentry, model = LER.mod.3.max, statistic= mod.boot, R=1000)
LER.mod.4.max.boot.out <- boot(data=battles_merged_singleentry, model = LER.mod.4.max, statistic= mod.boot, R=1000)
LER.mod.5.max.boot.out <- boot(data=battles_merged_singleentry, model = LER.mod.5.max, statistic= mod.boot, R=1000)
LER.mod.6.max.boot.out <- boot(data=battles_merged_singleentry, model = LER.mod.6.max, statistic= mod.boot, R=1000)


### Table output (main paper)
# MIMIC Models
htmlreg(file = file.path(DIR_OUTPUT, "ACH-Table-1.html"),
        list(extract.mplus.model.boot(model = mplusnoboot, model.boot = mplusboot, competence = TRUE),
             extract.mplus.model.boot(model = mplusnoboot, model.boot = mplusboot, competence = FALSE)), 
        digits = 3, stars = c(0.1, 0.05, 0.01), 
        reorder.coef = c(5,7,11,6,8:10,12,17:22,1,4,2,3,13,15,14,16), 
        custom.model.names = c("Competence", "Confederate Loyalty"), 
        custom.note = "Note: Standardized factor loadings presented, with each latent factor standardized to have mean zero and variance one. Bootstrapped standard errors clustered on commanders in parentheses. Two-tailed tests: ∗∗∗ p < 0.01, ∗∗ p < 0.05, ∗ p < 0.1.", 
        caption = "Table 1: MIMIC Model of Loyalty and Competence", caption.above = TRUE)


## Regression Models
# Coefficient names
BS.coef.names <- c("Cutpoint 2", "Cutpoint 1",
                   "Confederate Commander Competence", "Union Commander Competence",
                   "Confederate Commander Loyalty", "Union Commander Loyalty",
                   "Confederate Strength", "Union Strength",
                   "Confederate Battle",
                   "Confederate Commander Loyalty x Confederate Battle",
                   "Union Commander Loyalty x Confederate Battle",
                   "Union Attacker")

LER.coef.names <- c("Constant",
                    "Confederate Commander Competence", "Union Commander Competence",
                    "Confederate Commander Loyalty", "Union Commander Loyalty",
                    "Confederate Strength", "Union Strength",
                    "Confederate Battle",
                    "Confederate Commander Loyalty x Confederate Battle",
                    "Union Commander Loyalty x Confederate Battle",
                    "Union Attacker")

# Table 2
htmlreg(file = file.path(DIR_OUTPUT, "ACH-Table-2.html"),
        list(extract.lrm(BS.mod.1),
             extract.lrm(BS.mod.2),
             extract.lrm(BS.mod.3),
             extract.lrm(BS.mod.4),
             extract.lrm(BS.mod.5),
             extract.lrm(BS.mod.6)),
        omit.coef = "(S1|S2|S3|S4|S5|S6|S7|S8|S9|S0)",
        stars = c(0.01, 0.05, 0.1), 
        digits = 3, include.thresholds = TRUE,
        custom.coef.names = BS.coef.names,
        reorder.coef = c(3:6,7:12,2,1),
        custom.note = "Note: Ordered logit coefficients with battle-level observations. Dependent variable an ordered factor with three levels, in increasing order: Confederate Vic- tory, Inconclusive, and Union Victory. Model 6 includes commander-level fixed effects for those who fought in at least seven battles. Bootstrapped standard errors in parentheses. Two-tailed tests: ∗∗∗ p < 0.01, ∗∗ p < 0.05, ∗ p < 0.1.",
        caption = "Table 2: Latent Traits and Battle Outcomes",
        caption.above = TRUE,
        custom.model.names = c("Model 1", "Model 2", "Model 3",
                               "Model 4", "Model 5", "Model 6"),
        booktabs = TRUE, dcolumn  = TRUE, table = FALSE,  use.packages = FALSE)

# Table 3
htmlreg(file = file.path(DIR_OUTPUT, "ACH-Table-3.html"),
        list(extract.glm.boot(LER.mod.1, boot = LER.mod.1.boot.out, nullmod = LER.mod.0),
             extract.glm.boot(LER.mod.2, boot = LER.mod.2.boot.out, nullmod = LER.mod.0),
             extract.glm.boot(LER.mod.3, boot = LER.mod.3.boot.out, nullmod = LER.mod.0),
             extract.glm.boot(LER.mod.4, boot = LER.mod.4.boot.out, nullmod = LER.mod.0),
             extract.glm.boot(LER.mod.5, boot = LER.mod.5.boot.out, nullmod = LER.mod.0),
             extract.glm.boot(LER.mod.6, boot = LER.mod.6.boot.out, nullmod = LER.mod.0)),
        omit.coef = "(genFE)",
        stars = c(0.01, 0.05, 0.1), 
        digits = 3, include.thresholds = TRUE,
        custom.coef.names = LER.coef.names,
        custom.note = "Note: Quasi-binomial logistic coefficients, with battle-level observations. Dependent variable is the proportion of battle casualties from the Confederacy. Model 12 includes commander-level fixed effects for those who fought in at least seven battles. Bootstrapped standard errors in parentheses. Two-tailed tests: ∗∗∗ p < 0.01, ∗∗ p < 0.05, ∗ p < 0.1.",
        caption.above = TRUE,
        caption = "Table 3: Latent Traits and Relative Confederate Casualty Rates",
        custom.model.names = c("Model 7", "Model 8", "Model 9",
                               "Model 10", "Model 11", "Model 12"),
        booktabs = TRUE, dcolumn  = TRUE, table = FALSE,  use.packages = FALSE,
        reorder.coef = c(2:11, 1))



### Source File for Figure 1 (long)
suppressWarnings(source(file.path(DIR_CODE, "ACH-MovingAverages.R")))


### Figure 2 --- Correlations between loyalty and competence
set.seed(10)
f <- function(data, i){
  cor(data[i, 1], data[i, 2], method = "spearman", use = "complete.obs")
}

# Pull out the initial dates for all commanders
first_appts_all <- plyr::ddply(all_gens,
                               plyr::.(comm_id), summarise,
                               firstloyal = newloyal[which.min(edate2)],
                               firstcomp = comp[which.min(edate2)], confed = median(confed))

first_appts_low <- plyr::ddply(subset(all_gens, topoverall %in% c(8,9)),
                               plyr::.(comm_id), summarise,
                               firstloyal = newloyal[which.min(edate2)],
                               firstcomp = comp[which.min(edate2)], confed = median(confed))

first_appts_high <- plyr::ddply(subset(all_gens, topoverall > 9),
                                plyr::.(comm_id), summarise,
                                firstloyal = newloyal[which.min(edate2)],
                                firstcomp = comp[which.min(edate2)], confed = median(confed))

# Estimate the "initial" correlations
cor.test(subset(first_appts_low[,-1], confed == 0)[,1],
         subset(first_appts_low[,-1], confed == 0)[,2],
         method = "spearman")
cor.test(subset(first_appts_low[,-1], confed == 1)[,1],
         subset(first_appts_low[,-1], confed == 1)[,2],
         method = "spearman")

cor.test(subset(first_appts_high[,-1], confed == 0)[,1],
         subset(first_appts_high[,-1], confed == 0)[,2],
         method = "spearman")
cor.test(subset(first_appts_high[,-1], confed == 1)[,1],
         subset(first_appts_high[,-1], confed == 1)[,2],
         method = "spearman")

high.union.cor.initial <- with(subset(first_appts_high, confed == 0),
                               boot::boot(cbind(firstloyal, firstcomp),
                                          statistic = f, R = 1000)$t)
high.confed.cor.initial <- with(subset(first_appts_high, confed == 1),
                                boot::boot(cbind(firstloyal, firstcomp),
                                           statistic = f, R = 1000)$t)
low.union.cor.initial <- with(subset(first_appts_low, confed == 0),
                              boot::boot(cbind(firstloyal, firstcomp),
                                         statistic = f, R = 1000)$t)
low.confed.cor.initial <- with(subset(first_appts_low, confed == 1),
                               boot::boot(cbind(firstloyal, firstcomp),
                                          statistic = f, R = 1000)$t)

# Dynamic correlations based on moving average estimates
high.confed.cor.dynamic <- with(subset(movavg_frame_high, Side == "Confederacy"),
                                boot::boot(cbind(Competence, Loyalty),
                                           statistic = f, R = 1000)$t)
low.confed.cor.dynamic <- with(subset(movavg_frame_low, Side == "Confederacy"),
                               boot::boot(cbind(Competence, Loyalty),
                                          statistic = f, R = 1000)$t)
high.union.cor.dynamic <- with(subset(movavg_frame_high, Side == "Union"),
                               boot::boot(cbind(Competence, Loyalty),
                                          statistic = f, R = 1000)$t)
low.union.cor.dynamic <- with(subset(movavg_frame_low, Side == "Union"),
                              boot::boot(cbind(Competence, Loyalty),
                                         statistic = f, R = 1000)$t)

# Put it all into a data frame
cor.frame.all <- data.frame(rbind(c(as.numeric(quantile(high.union.cor.initial,
                                                        c(0.025, 0.05, 0.5, 0.95, 0.975))),
                                    "Initial", "Union",
                                    "High-Ranking\nCommanders"),
                                  c(as.numeric(quantile(low.union.cor.initial,
                                                        c(0.025, 0.05, 0.5, 0.95, 0.975))),
                                    "Initial", "Union",
                                    "Brigadier Generals, Colonels,\nand Commodores"),
                                  c(as.numeric(quantile(high.confed.cor.initial,
                                                        c(0.025, 0.05, 0.5, 0.95, 0.975))),
                                    "Initial", "Confederacy",
                                    "High-Ranking\nCommanders"),
                                  c(as.numeric(quantile(low.confed.cor.initial,
                                                        c(0.025, 0.05, 0.5, 0.95, 0.975))),
                                    "Initial", "Confederacy",
                                    "Brigadier Generals, Colonels,\nand Commodores"),
                                  c(as.numeric(quantile(high.union.cor.dynamic,
                                                        c(0.025, 0.05, 0.5, 0.95, 0.975))),
                                    "Dynamic", "Union",
                                    "High-Ranking\nCommanders"),
                                  c(as.numeric(quantile(low.union.cor.dynamic,
                                                        c(0.025, 0.05, 0.5, 0.95, 0.975))),
                                    "Dynamic", "Union",
                                    "Brigadier Generals, Colonels,\nand Commodores"),
                                  c(as.numeric(quantile(high.confed.cor.dynamic,
                                                        c(0.025, 0.05, 0.5, 0.95, 0.975))),
                                    "Dynamic", "Confederacy",
                                    "High-Ranking\nCommanders"),
                                  c(as.numeric(quantile(low.confed.cor.dynamic,
                                                        c(0.025, 0.05, 0.5, 0.95, 0.975))),
                                    "Dynamic", "Confederacy",
                                    "Brigadier Generals, Colonels,\nand Commodores")))


colnames(cor.frame.all) <- c("low.95", "low.90", "pred",
                             "high.90", "high.95",
                             "Type", "Side", "Ranking")
cor.frame.all$low.90 <- as.numeric(as.character(cor.frame.all$low.90))
cor.frame.all$low.95 <- as.numeric(as.character(cor.frame.all$low.95))
cor.frame.all$pred <- as.numeric(as.character(cor.frame.all$pred))
cor.frame.all$high.90 <- as.numeric(as.character(cor.frame.all$high.90))
cor.frame.all$high.95 <- as.numeric(as.character(cor.frame.all$high.95))

# Figure 2
cor.plot.all <- ggplot(cor.frame.all,
                       aes(x = Ranking, y = pred,
                           shape = Type, color = Type)) +
  geom_point(position=position_dodge(width=0.5)) +
  geom_errorbar(aes(ymin = low.90, ymax = high.90),
                position=position_dodge(width=0.5),
                width = 0, size = 1) +
  geom_errorbar(aes(ymin = low.95, ymax = high.95),
                position=position_dodge(width=0.5),
                width = 0) +
  facet_wrap(~ Side) +
  coord_flip() +
  theme_bw() +
  geom_hline(yintercept = 0, linetype = "dotted") +
  xlab('Officer Ranking\n') +
  ylab('\nCorrelation Between Loyalty and Competence') +
  scale_color_grey(name = "Correlation Type",
                   start = 0, end = 0.6) +
  scale_shape_discrete(name = "Correlation Type") +
  guides(colour = guide_legend(reverse=TRUE),
         shape = guide_legend(reverse = TRUE))

ggsave(cor.plot.all, file = file.path(DIR_FIGURES,"ACH-Figure-2.eps"), 
       device = cairo_ps,
       width = 6.5, height = 3*6.5/8)



### Figure 3 --- Battle outcomes
# Function to recover marginal effects for traits
BS_frame_gen_trait <- function(trait, side){
  set.seed(10)
  BS.newcoefs <- rmvnorm(10000, mean = coef(BS.mod.2),
                         sigma = vcov(BS.mod.2))
  BS.frame <- data.frame(matrix(NA, ncol = length(colnames(BS.newcoefs)), nrow = 2))
  trait <- grep(trait, c("loyal", "comp"), ignore.case=TRUE, value=TRUE)
  side <- grep(side, c("confed", "union"), ignore.case = TRUE, value = TRUE)
  trait.foo <- seq(mean(BS.mod.2$x[,paste(side, trait, sep="_")]) -
                     sd(BS.mod.2$x[,paste(side, trait, sep="_")]),
                   mean(BS.mod.2$x[,paste(side, trait, sep="_")]) +
                     sd(BS.mod.2$x[,paste(side, trait, sep="_")]),
                   length.out = 2)
  pred.frame <- data.frame(matrix(NA, ncol = length(BS.mod.2$coef) - 2,
                                  nrow = length(trait.foo)))
  colnames(pred.frame) <- names(BS.mod.2$coef)[-c(1,2)]
  pred.frame['confed_comp'] <- mean(BS.mod.2$x[,'confed_comp'])
  pred.frame['confed_loyal'] <- mean(BS.mod.2$x[,'confed_loyal'])
  pred.frame['union_comp'] <- mean(BS.mod.2$x[,'union_comp'])
  pred.frame['union_loyal'] <- mean(BS.mod.2$x[,'union_loyal'])
  pred.frame['CS_strength_1000'] <- mean(BS.mod.2$x[,'CS_strength_1000'])
  pred.frame['US_strength_1000'] <- mean(BS.mod.2$x[,'US_strength_1000'])
  pred.frame[paste(side, trait, sep="_")] <- trait.foo
  pred.xb <- t(as.matrix(pred.frame)%*%t(BS.newcoefs[,-grep("Indecisive|Union", colnames(BS.newcoefs))]))
  pred.xb1 <- BS.newcoefs[,1] + pred.xb
  pred.xb2 <- BS.newcoefs[,2] + pred.xb
  pred.p1 <- plogis(pred.xb1)
  pred.p2 <- plogis(pred.xb2) - plogis(pred.xb1)
  pred.p3 <- 1 - plogis(pred.xb2)
  pred.probs <- data.frame(cbind(pred.p1[,2] - pred.p1[,1],
                                 pred.p2[,2] - pred.p2[,1],
                                 pred.p3[,2] - pred.p3[,1]))
  pred.probs <- t(apply(pred.probs,2,quantile, c(0.025, 0.05,0.5,0.95, 0.975)))
  pred.prob.frame <- data.frame(pred.probs,
                                trait,
                                side,
                                Outcome = c("Union", "Inconclusive", "Confederacy"),
                                row.names = NULL)
  return(pred.prob.frame)
}

# Function to recover marginal effects for troop strength
BS_frame_gen_str2 <- function(side){
  set.seed(10)
  BS.newcoefs <- rmvnorm(10000, mean = coef(BS.mod.2),
                         sigma = vcov(BS.mod.2))
  BS.frame <- data.frame(matrix(NA, ncol = length(colnames(BS.newcoefs)), nrow = 2))
  side <- grep(side, c("CS", "US"), ignore.case = TRUE, value = TRUE)
  str.foo <- seq(mean(BS.mod.2$x[,paste(side, "strength_1000", sep="_")]) -
                   (1/2)*sd(BS.mod.2$x[,paste(side, "strength_1000", sep="_")]),
                 mean(BS.mod.2$x[,paste(side, "strength_1000", sep="_")]) +
                   (1/2)*sd(BS.mod.2$x[,paste(side, "strength_1000", sep="_")]),
                 length.out = 2)
  pred.frame <- data.frame(matrix(NA, ncol = length(BS.mod.2$coef) - 2,
                                  nrow = length(str.foo)))
  colnames(pred.frame) <- names(BS.mod.2$coef)[-c(1,2)]
  pred.frame['confed_comp'] <- mean(BS.mod.2$x[,'confed_comp'])
  pred.frame['confed_loyal'] <- mean(BS.mod.2$x[,'confed_loyal'])
  pred.frame['union_comp'] <- mean(BS.mod.2$x[,'union_comp'])
  pred.frame['union_loyal'] <- mean(BS.mod.2$x[,'union_loyal'])
  pred.frame['CS_strength_1000'] <- mean(BS.mod.2$x[,'CS_strength_1000'])
  pred.frame['US_strength_1000'] <- mean(BS.mod.2$x[,'US_strength_1000'])
  pred.frame[paste(side, "strength_1000", sep="_")] <- str.foo
  pred.xb <- t(as.matrix(pred.frame)%*%t(BS.newcoefs[,-grep("Indecisive|Union", colnames(BS.newcoefs))]))
  pred.xb1 <- BS.newcoefs[,1] + pred.xb
  pred.xb2 <- BS.newcoefs[,2] + pred.xb
  pred.p1 <- plogis(pred.xb1)
  pred.p2 <- plogis(pred.xb2) - plogis(pred.xb1)
  pred.p3 <- 1 - plogis(pred.xb2)
  pred.probs <- data.frame(cbind(pred.p1[,2] - pred.p1[,1],
                                 pred.p2[,2] - pred.p2[,1],
                                 pred.p3[,2] - pred.p3[,1]))
  pred.probs <- t(apply(pred.probs,2,quantile, c(0.025, 0.05,0.5,0.95, 0.975)))
  pred.prob.frame <- data.frame(pred.probs,
                                trait = "strength",
                                side,
                                Outcome = c("Union", "Inconclusive", "Confederacy"),
                                row.names = NULL)
  return(pred.prob.frame)
}

# Estimate the marginal effects
union.comp.BS.mfx <- BS_frame_gen_trait("comp", "union")
confed.comp.BS.mfx <- BS_frame_gen_trait("comp", "confed")
union.loyal.BS.mfx <- BS_frame_gen_trait("loyal", "union")
confed.loyal.BS.mfx <- BS_frame_gen_trait("loyal", "confed")
union.str.BS.mfx <- BS_frame_gen_str2("US")
confed.str.BS.mfx <- BS_frame_gen_str2("CS")

# Combine the MFX into a data frame
BS.mfx.frame <- rbind(union.comp.BS.mfx,
                      confed.comp.BS.mfx,
                      union.loyal.BS.mfx,
                      confed.loyal.BS.mfx,
                      union.str.BS.mfx,
                      confed.str.BS.mfx)

colnames(BS.mfx.frame) <- c("low.95", "low.90", "pred",
                            "high.90", "high.95",
                            "Trait", "Combatant", "Outcome")

BS.mfx.frame$Trait <- factor(BS.mfx.frame$Trait,
                             labels = c("Competence", "Loyalty", "Troop Strength"))
BS.mfx.frame$Trait <- relevel(BS.mfx.frame$Trait, ref = "Troop Strength")
BS.mfx.frame$Combatant <- recode(BS.mfx.frame$Combatant, US = "union", CS = "confed")
BS.mfx.frame$Combatant <- factor(BS.mfx.frame$Combatant, labels = c("Union", "Confederacy"))
BS.mfx.frame$Outcome <- factor(BS.mfx.frame$Outcome,
                               labels = c("Confederate\nVictory", 
                                          "Inconclusive", 
                                          "Union \nVictory"))

# Figure 3 --- Marginal effects on battle outcomes
BS.mfx.plot <- ggplot(BS.mfx.frame, aes(x = Trait, y = pred, shape = Combatant, color = Combatant)) +
  geom_point(position=position_dodge(width=0.5)) +
  geom_errorbar(aes(ymin = low.90, ymax = high.90),
                position=position_dodge(width=0.5),
                width = 0, size = 1) +
  geom_errorbar(aes(ymin = low.95, ymax = high.95),
                position=position_dodge(width=0.5),
                width = 0) +
  coord_flip() +
  theme_bw() +
  facet_grid(~Outcome) +
  geom_hline(yintercept = 0, linetype = "dotted") +
  xlab('Trait\n') +
  ylab('\nPredicted Change in Probability of Outcome') +
  scale_color_grey(name = "Combatant",
                   start = 0, end = 0.6) +
  scale_shape_discrete(name = "Combatant") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  guides(colour = guide_legend(reverse=TRUE),
         shape = guide_legend(reverse = TRUE))

ggsave(BS.mfx.plot, device = cairo_ps, 
       file = file.path(DIR_FIGURES, "ACH-Figure-3.eps"), 
       width = 6.5, height = 3.65625)


### Figure 4
# Function to generate marginal effects of traits
LER_frame_gen <- function(trait, side){
  set.seed(10)
  LER.newcoefs <- rmvnorm(10000, mean = coef(LER.mod.3),
                          sigma = bootcovgen(LER.mod.3.boot.out))
  colnames(LER.newcoefs) <- rownames(data.frame(LER.mod.3$coef))
  LER.frame <- data.frame(matrix(NA, ncol = length(LER.mod.3$coef), nrow = 2))
  trait <- grep(trait, c("loyal", "comp"), ignore.case=TRUE, value=TRUE)
  side <- grep(side, c("confed", "union"), ignore.case = TRUE, value = TRUE)
  trait.foo <- seq(mean(LER.mod.3$model[,paste(side, trait, sep="_")]) -
                     sd(LER.mod.3$model[,paste(side, trait, sep="_")]),
                   mean(LER.mod.3$model[,paste(side, trait, sep="_")]) +
                     sd(LER.mod.3$model[,paste(side, trait, sep="_")]),
                   length.out = 2)
  pred.frame <- data.frame(matrix(NA, ncol = length(LER.mod.3$coef), nrow = length(trait.foo)))
  colnames(pred.frame) <- names(LER.mod.3$coef)
  names(pred.frame)[grep("TRUE",names(pred.frame))] <- "confed_battle"
  pred.frame['(Intercept)'] <- 1
  pred.frame['confed_comp'] <- mean(LER.mod.3$model[,'confed_comp'])
  pred.frame['confed_loyal'] <- mean(LER.mod.3$model[,'confed_loyal'])
  pred.frame['union_comp'] <- mean(LER.mod.3$model[,'union_comp'])
  pred.frame['union_loyal'] <- mean(LER.mod.3$model[,'union_loyal'])
  pred.frame['CS_strength_1000'] <- mean(LER.mod.3$model[,'CS_strength_1000'])
  pred.frame['US_strength_1000'] <- mean(LER.mod.3$model[,'US_strength_1000'])
  pred.frame['confed_battle'] <- median(LER.mod.3$model[,'confed_battle'])
  pred.frame[paste(side, trait, sep="_")] <- trait.foo
  pred.prob <- plogis(t(as.matrix(pred.frame)%*%t(LER.newcoefs)))
  pred.probs <- quantile(apply(pred.prob,1,diff), c(0.025, 0.05, 0.5, 0.95, 0.975))
  pred.prob.frame <- data.frame(t(pred.probs),
                                trait,
                                side,
                                row.names = NULL)
  return(pred.prob.frame)
}

# Estimate the marginal effects and coerce them into a data frame
union.comp.LER.mfx <- LER_frame_gen("comp", "union")
confed.comp.LER.mfx <- LER_frame_gen("comp", "confed")
union.loyal.LER.mfx <- LER_frame_gen("loyal", "union")
confed.loyal.LER.mfx <- LER_frame_gen("loyal", "confed")

LER.mfx.frame <- rbind(union.comp.LER.mfx,
                       confed.comp.LER.mfx,
                       union.loyal.LER.mfx,
                       confed.loyal.LER.mfx)

colnames(LER.mfx.frame) <- c("low.95", "low.90", "pred",
                             "high.90", "high.95",
                             "Trait", "Combatant")
LER.mfx.frame$Trait <- factor(LER.mfx.frame$Trait, labels = c("Competence", "Loyalty"))
LER.mfx.frame$Combatant <- factor(LER.mfx.frame$Combatant, labels = c("Union", "Confederacy"))


# Figure 4
LER.mfx.plot <- ggplot(LER.mfx.frame, aes(x = Trait, y = pred, shape = Combatant, color = Combatant)) +
  geom_point(position=position_dodge(width=0.5)) +
  geom_errorbar(aes(ymin = low.90, ymax = high.90),
                position=position_dodge(width=0.5),
                width = 0, size = 1) +
  geom_errorbar(aes(ymin = low.95, ymax = high.95),
                position=position_dodge(width=0.5),
                width = 0) +
  theme_bw() +
  coord_flip() +
  geom_hline(yintercept = 0, linetype = "dotted") +
  xlab('Trait\n') +
  ylab('\nPredicted Change in Proportion of Casualties from Confederate Side') +
  scale_color_grey(name = "Combatant",
                   start = 0, end = 0.6) +
  scale_shape_discrete(name = "Combatant") +
  guides(colour = guide_legend(reverse=TRUE),
         shape = guide_legend(reverse = TRUE))

ggsave(LER.mfx.plot, file = file.path(DIR_FIGURES, "ACH-Figure-4.eps"), 
       device = cairo_ps, width = 6.5, height = 2.4375)



#### Grant-Lee Simulations (Figures 5/6/7)
# Get battle names
battlenames <- subset(read.csv("data/ACH-battles_summary.csv"), select = c(cwsac_id,	battle_name))
colnames(battlenames)[1] <- "battle"
battlenames$battle_name <- as.character(battlenames$battle_name)
battlenames[grep("Jackson", battlenames$battle_name),][2:3,2] <- c("Jackson, Mississippi", "Jackson, Tennessee")
battles_merged_singleentry_old <- battles_merged_singleentry

battles_merged_singleentry <- merge(battles_merged_singleentry_old, battlenames)

# Recode some data for Zelig
outcome.vars <- colnames(model.matrix(BS.mod.1))
outcome.vars <- outcome.vars[-grep("Intercept|:", outcome.vars)]


# Collect lifetime means for traits
lifetime_means_loyal <- aggregate(all_gens$newloyal, list(all_gens$comm_id), mean)
lifetime_means_loyal$confed <- 0
lifetime_means_loyal$confed[grep("CS", lifetime_means_loyal$Group.1)] <- 1
lifetime_means_comp <- aggregate(all_gens$comp, list(all_gens$comm_id), mean)
lifetime_means_comp$confed <- 0
lifetime_means_comp$confed[grep("CS", lifetime_means_comp$Group.1)] <- 1

# Initial Lee simulations
set.seed(10)
outcome.frame <- na.omit(data.frame(outcome = battles_merged_singleentry$outcome,
                                    battles_merged_singleentry[,colnames(battles_merged_singleentry) %in%
                                                                 outcome.vars]))
lee.zelig <- zologit$new()
lee.zelig$zelig(outcome ~ confed_comp + union_comp + confed_loyal + union_loyal,
                data = outcome.frame, model = "ologit", bootstrap = 1000, set.seed(10))


# Zelig has problems with simulating from a bootstrap, so we grab the
# hessian from the bootstrapped model and place it into the normal object
lee.hessian <- lee.zelig$zelig.out$z.out[[1]]$Hessian

lee.zelig <- zologit$new()
lee.zelig$zelig(outcome ~ confed_comp + union_comp + confed_loyal + union_loyal,
                data = outcome.frame, model = "ologit")

lee.zelig$zelig.out$z.out[[1]]$Hessian <- lee.hessian

lee.battles <- subset(battles[order(battles$start_date),], comm_id == "CS122")$battle
lee.quantiles <- NULL

for(i in 1:length(battles_merged_singleentry[battles_merged_singleentry$battle %in%
                                             lee.battles,]$battle)){
  lee.zelig$setx(confed_comp = as.numeric(battles_merged_singleentry[battles_merged_singleentry$battle %in%
                                                                       lee.battles,][i,]$confed_comp),
                 union_comp = as.numeric(battles_merged_singleentry[battles_merged_singleentry$battle %in%
                                                                      lee.battles,][i,]$union_comp),
                 confed_loyal = as.numeric(battles_merged_singleentry[battles_merged_singleentry$battle %in%
                                                                        lee.battles,][i,]$confed_loyal),
                 union_loyal = as.numeric(battles_merged_singleentry[battles_merged_singleentry$battle %in%
                                                                       lee.battles,][i,]$union_loyal))

  lee.zelig$setx1(confed_comp = mean(lifetime_means_comp[lifetime_means_comp$confed == 1,2]),
                  union_comp = as.numeric(battles_merged_singleentry[battles_merged_singleentry$battle %in%
                                                                       lee.battles,][i,]$union_comp),
                  confed_loyal = mean(lifetime_means_loyal[lifetime_means_loyal$confed == 1,2]),
                  union_loyal = as.numeric(battles_merged_singleentry[battles_merged_singleentry$battle %in%
                                                                        lee.battles,][i,]$union_loyal))

  lee.zelig$sim(10000)
  lee.foo <- list(t(apply(lee.zelig$get_qi("ev", "x1") - lee.zelig$get_qi("ev", "x"),
                          2,quantile,c(0.025,0.05,0.5,0.95,0.975))))
  names(lee.foo) <- as.character(battles_merged_singleentry[battles_merged_singleentry$battle %in%
                                                              lee.battles,][i,]$battle_name)
  lee.quantiles <- c(lee.quantiles, lee.foo)
}

lee.quantiles <- lee.quantiles[order(as.Date(battles_merged_singleentry$start_date[as.character(battles_merged_singleentry$battle_name)  %in% names(lee.quantiles)]))]

# Grant simulations
set.seed(10)
outcome.frame <- na.omit(data.frame(outcome = battles_merged_singleentry$outcome,
                                    battles_merged_singleentry[,colnames(battles_merged_singleentry) %in%
                                                                 outcome.vars]))
grant.zelig <- zologit$new()
grant.zelig$zelig(outcome ~ confed_comp + union_comp + confed_loyal + union_loyal,
                  data = outcome.frame, model = "ologit", bootstrap = 1000, set.seed(10))

# Same issue with simulating from the bootstrapped covariance matrix
grant.hessian <- grant.zelig$zelig.out$z.out[[1]]$Hessian

grant.zelig <- zologit$new()
grant.zelig$zelig(outcome ~ confed_comp + union_comp + confed_loyal + union_loyal,
                  data = outcome.frame, model = "ologit")

grant.zelig$zelig.out$z.out[[1]]$Hessian <- grant.hessian


grant.battles <- subset(battles[order(battles$start_date),], comm_id == "US190")$battle

grant.quantiles <- NULL
for(j in 1:length(battles_merged_singleentry[battles_merged_singleentry$battle %in%
                                             grant.battles,]$battle)){
  grant.zelig$setx(confed_comp = as.numeric(battles_merged_singleentry[battles_merged_singleentry$battle %in%
                                                                         grant.battles,][j,]$confed_comp),
                   union_comp = as.numeric(battles_merged_singleentry[battles_merged_singleentry$battle %in%
                                                                        grant.battles,][j,]$union_comp),
                   confed_loyal = as.numeric(battles_merged_singleentry[battles_merged_singleentry$battle %in%
                                                                          grant.battles,][j,]$confed_loyal),
                   union_loyal = as.numeric(battles_merged_singleentry[battles_merged_singleentry$battle %in%
                                                                         grant.battles,][j,]$union_loyal))


  grant.zelig$setx1(confed_comp = as.numeric(battles_merged_singleentry[battles_merged_singleentry$battle %in%
                                                                          grant.battles,][j,]$confed_comp),
                    union_comp = mean(lifetime_means_comp[lifetime_means_comp$confed == 0,2]),
                    confed_loyal = as.numeric(battles_merged_singleentry[battles_merged_singleentry$battle %in%
                                                                           grant.battles,][j,]$confed_loyal),
                    union_loyal = mean(lifetime_means_loyal[lifetime_means_loyal$confed == 0,2]))

  grant.zelig$sim(10000)
  grant.foo <- list(t(apply(grant.zelig$get_qi("ev", "x1") - grant.zelig$get_qi("ev", "x"),
                            2,quantile,c(0.025,0.05,0.5,0.95,0.975))))
  names(grant.foo) <- as.character(battles_merged_singleentry[battles_merged_singleentry$battle %in%
                                                                grant.battles,][j,]$battle_name)
  grant.quantiles <- c(grant.quantiles, grant.foo)
}

grant.quantiles <- grant.quantiles[order(as.Date(battles_merged_singleentry$start_date[as.character(battles_merged_singleentry$battle_name)  %in% names(grant.quantiles)]))]

# Correcting some battle names (minor corrections for style, mostly)
names(lee.quantiles)[names(lee.quantiles) == "Gaines' Mill"] <- "Gaines's Mill"
names(lee.quantiles)[names(lee.quantiles) == "Manassas II"] <- "Second Bull Run"
names(lee.quantiles)[names(lee.quantiles) == "Fredericksburg I"] <- "Fredericksburg"
names(lee.quantiles)[names(lee.quantiles) == "Rappahannock Station II"] <- "Second Rappahannock Station"
names(lee.quantiles)[names(lee.quantiles) == "Petersburg II"] <- "Second Petersburg"
names(lee.quantiles)[names(lee.quantiles) == "Deep Bottom II"] <- "Second Deep Bottom"
names(lee.quantiles)[names(lee.quantiles) == "Chaffin's Farm / New Market Heights"] <- "Chaffin's Farm"
names(lee.quantiles)[names(lee.quantiles) == "Petersburg III"] <- "Third Petersburg"
names(grant.quantiles)[names(grant.quantiles) == "Chattanooga III"] <- "Missionary Ridge"
names(grant.quantiles)[names(grant.quantiles) == "Petersburg II"] <- "Second Petersburg"
names(grant.quantiles)[names(grant.quantiles) == "Petersburg III"] <- "Third Petersburg"

# Collect battle-level predicted changes in probability for Lee
lee.frame <- do.call(rbind.data.frame, lee.quantiles)
lee.frame$Outcome <- gsub("^.*\\.","",rownames(lee.frame))
lee.frame$Outcome <- factor(lee.frame$Outcome,
                            labels = c("Confederate Victory", "Inconclusive", "Union Victory"))
lee.frame$Battle <- gsub("\\..*","",rownames(lee.frame))


rownames(lee.frame) <- NULL
colnames(lee.frame)[1:5] <- c("low.95", "low.90", "pred", "high.90", "high.95")

lee.frame$Battle <- factor(lee.frame$Battle, levels = names(lee.quantiles))

levels(lee.frame$Outcome) <- c("Change in\nProbability of\nConfederate Victory",
                               "Change in\nProbability of\nInconclusive\nOutcome",
                               "Change in\nProbability of\nUnion Victory")

lee.frame$Battle <- factor(lee.frame$Battle, levels = rev(names(lee.quantiles)))

# And same for Grant
grant.frame <- do.call(rbind.data.frame, grant.quantiles)
grant.frame$Outcome <- gsub("^.*\\.","",rownames(grant.frame))
grant.frame$Outcome <- factor(grant.frame$Outcome,
                              labels = c("Confederate Victory",
                                         "Inconclusive",
                                         "Union Victory"))
grant.frame$Battle <- gsub("\\..*","",rownames(grant.frame))
grant.frame$Battle <- factor(grant.frame$Battle, levels = names(grant.quantiles))


rownames(grant.frame) <- NULL
colnames(grant.frame)[1:5] <- c("low.95", "low.90", "pred", "high.90", "high.95")

levels(grant.frame$Outcome) <- c("Change in\nProbability of\nConfederate Victory",
                                 "Change in\nProbability of\nInconclusive\nOutcome",
                                 "Change in\nProbability of\nUnion Victory")

grant.frame$Battle <- factor(grant.frame$Battle, levels = rev(names(grant.quantiles)))

# Make the plots
lee.plot <- ggplot(lee.frame, aes(x = Battle, y = pred)) +
  geom_point() +
  geom_errorbar(aes(ymin = low.95, ymax = high.95),
                width = 0) +
  geom_errorbar(aes(ymin = low.90, ymax = high.90),
                width = 0, size = 1) +
  facet_wrap(~Outcome, scales = "free_x") +
  scale_color_manual(values = c("Gray40", "Red", "Blue")) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  geom_hline(yintercept = 0, linetype = "dotted") +
  coord_flip() +
  ylab('\nEffect of Replacing General Lee with an\n"Average" Confederate Commander') +
  xlab('Battle\n')

grant.plot <- ggplot(grant.frame, aes(x = Battle, y = pred)) +
  geom_point() +
  geom_errorbar(aes(ymin = low.95, ymax = high.95),
                width = 0,
                position=position_dodge(width=0.75)) +
  geom_errorbar(aes(ymin = low.90, ymax = high.90),
                width = 0, size = 1,
                position=position_dodge(width=0.75)) +
  facet_wrap(~Outcome) + coord_flip() +
  scale_color_manual(values = c("Gray40", "Red", "Blue")) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  geom_hline(yintercept = 0, linetype = "dotted") +
  ylab('\nEffect of Replacing General Grant with an\n"Average" Union Commander') +
  xlab('Battle\n')

# Save the Lee/Grant plots as Figures 5 and 6
ggsave(file.path(DIR_FIGURES, "ACH-Figure-5.eps"), device = cairo_ps,
       lee.plot, height=6*7.5/6.5, width=6.5)

ggsave(file.path(DIR_FIGURES, "ACH-Figure-6.eps"), device = cairo_ps,
       grant.plot, height=4.5*7.5/6.5, width=6.5)


## Ternary plot (Figure 7)
# Lee at Fredericksburg
set.seed(10)
lee.fred <- grep("Fredericksburg",as.character(battles_merged_singleentry[battles_merged_singleentry$battle %in%
                                                                            lee.battles,]$battle_name))
lee.zelig$setx(confed_comp = as.numeric(battles_merged_singleentry[battles_merged_singleentry$battle %in%
                                                                     lee.battles,][lee.fred,]$confed_comp),
               union_comp = as.numeric(battles_merged_singleentry[battles_merged_singleentry$battle %in%
                                                                    lee.battles,][lee.fred,]$union_comp),
               confed_loyal = as.numeric(battles_merged_singleentry[battles_merged_singleentry$battle %in%
                                                                      lee.battles,][lee.fred,]$confed_loyal),
               union_loyal = as.numeric(battles_merged_singleentry[battles_merged_singleentry$battle %in%
                                                                     lee.battles,][lee.fred,]$union_loyal))


lee.zelig$setx1(confed_comp = mean(lifetime_means_comp[lifetime_means_comp$confed == 1,2]),
                union_comp = as.numeric(battles_merged_singleentry[battles_merged_singleentry$battle %in%
                                                                     lee.battles,][lee.fred,]$union_comp),
                confed_loyal = mean(lifetime_means_loyal[lifetime_means_loyal$confed == 1,2]),
                union_loyal = as.numeric(battles_merged_singleentry[battles_merged_singleentry$battle %in%
                                                                      lee.battles,][lee.fred,]$union_loyal))

lee.zelig$sim(10000)
lee.fred.counter <- data.frame(lee.zelig$get_qi("ev", "x1"))
lee.fred.cobs <- data.frame(lee.zelig$get_qi("ev", "x"))
lee.fred.both <- rbind(data.frame(lee.fred.cobs, Commander = "General Lee"),
                       data.frame(lee.fred.counter, Commander = '"Average" Confederate Commander'))
colnames(lee.fred.both) <- c("CSA", "Inconclusive", "USA", "Commander")


# Grant at Third Petersburg
set.seed(10)
grant.3p <- grep("Petersburg III", as.character(battles_merged_singleentry[battles_merged_singleentry$battle %in%
                                                                             grant.battles,]$battle_name))
grant.zelig$setx(confed_comp = as.numeric(battles_merged_singleentry[battles_merged_singleentry$battle %in%
                                                                       grant.battles,][grant.3p,]$confed_comp),
                 union_comp = as.numeric(battles_merged_singleentry[battles_merged_singleentry$battle %in%
                                                                      grant.battles,][grant.3p,]$union_comp),
                 confed_loyal = as.numeric(battles_merged_singleentry[battles_merged_singleentry$battle %in%
                                                                        grant.battles,][grant.3p,]$confed_loyal),
                 union_loyal = as.numeric(battles_merged_singleentry[battles_merged_singleentry$battle %in%
                                                                       grant.battles,][grant.3p,]$union_loyal))


grant.zelig$setx1(confed_comp = as.numeric(battles_merged_singleentry[battles_merged_singleentry$battle %in%
                                                                        grant.battles,][grant.3p,]$confed_comp),
                  union_comp = mean(lifetime_means_comp[lifetime_means_comp$confed == 0,2]),
                  confed_loyal = as.numeric(battles_merged_singleentry[battles_merged_singleentry$battle %in%
                                                                         grant.battles,][grant.3p,]$confed_loyal),
                  union_loyal = mean(lifetime_means_loyal[lifetime_means_loyal$confed == 0,2]))

grant.zelig$sim(10000)
grant.3p.counter <- data.frame(grant.zelig$get_qi("ev", "x1"))
grant.3p.cobs <- data.frame(grant.zelig$get_qi("ev", "x"))
grant.3p.both <- rbind(data.frame(grant.3p.cobs, Commander = "General Grant"),
                       data.frame(grant.3p.counter, Commander = '"Average" Union Commander'))
colnames(grant.3p.both) <- c("CSA", "Inconclusive", "USA", "Commander")


both.tern <- rbind(data.frame(lee.fred.both, Battle = "Lee at Fredericksburg"),
                   data.frame(grant.3p.both, Battle = "Grant at Third Petersburg"))

levels(both.tern$Commander) <- c("Observed", "Counterfactual", "Observed", "Counterfactual")

both.tern.plot <- ggtern(both.tern, aes(CSA, Inconclusive, USA)) +
  geom_point(alpha = 0.05) +
  facet_grid(Commander ~ Battle) +
  theme_bw() +
  geom_confidence_tern(colour = "grey50")

# Save the plot as Figure 7
ggsave(file.path(DIR_FIGURES, "ACH-Figure-7.eps"), device = cairo_ps,
       both.tern.plot, height=7.5*9/6.5, width=6.5)





### Appendix A: Battle-Level Summary Statistics
battles_merged_singleentry_temp <- battles_merged_singleentry
battles_merged_singleentry_temp$unionattacker <- as.numeric(battles_merged_singleentry_temp$attacker == "US")
battles_merged_singleentry_temp$unionvictory <-  as.numeric(battles_merged_singleentry_temp$outcome == "Union")
battles_merged_singleentry_temp$confedvictory <-  as.numeric(battles_merged_singleentry_temp$outcome == "Confederate")

# Table A-1
stargazer::stargazer(subset(na.omit(battles_merged_singleentry_temp), 
                            select = c(CS_casualties, US_casualties,
                                       confedvictory, unionvictory, 
                                       confed_comp, union_comp, confed_loyal, union_loyal,
                                       CS_strength, US_strength, confed_battle, unionattacker)),
                     covariate.labels = c("Confederate Casualties", "Union Casualties",
                                          "Confederate Victory", "Union Victory",
                                          "Confederate Competence", "Union Competence", 
                                          "Confederate Loyalty", "Union Loyalty",
                                          "Confederate Strength", "Union Strength", "Confederate Battle", 
                                          "Union Attacker"),
                     out = file.path(DIR_OUTPUT,"ACH-Table-A1.html"),
                     type = "html", style = "ajps", align = TRUE,
                     title = "Table A-1: Battle-Level Summary Statistics",
                     notes = "Note: Loyalty and competence measures are the means for all commanders taking part in the battle.")


### Appendix C: Alternative Specifications
## Tables C-1 and C-2: Alternaitve MIMIC Models
mplusbootmin <- readModels(target = file.path(DIR_CODE, "ACH-MIMIC-BootstrapMin.out"))
mplusnobootmin <- readModels(target = file.path(DIR_CODE, "ACH-MIMIC-NoBootstrapMin.out"))
mplusbootmax <- readModels(target = file.path(DIR_CODE, "ACH-MIMIC-BootstrapMax.out"))
mplusnobootmax <- readModels(target = file.path(DIR_CODE, "ACH-MIMIC-NoBootstrapMax.out"))

# Table C-1
htmlreg(file = file.path(DIR_OUTPUT, "ACH-Table-C1.html"),
        list(extract.mplus.model.boot(model = mplusnobootmin, model.boot = mplusbootmin, competence = TRUE),
             extract.mplus.model.boot(model = mplusnobootmin, model.boot = mplusbootmin, competence = FALSE)), 
        digits = 3, stars = c(0.1, 0.05, 0.01), 
        reorder.coef = c(5,7,11,6,8:10,12,17:22,1,4,2,3,13,15,14,16), 
        custom.model.names = c("Competence", "Confederate Loyalty"), 
        custom.note = "Note: Strength and casualty estimates used are the 5th percentile of imputed values, as opposed to the geometric mean used in the main text. Standardized factor loadings presented, with each latent factor standardized to have mean zero and variance one. Bootstrapped standard errors clustered on commanders in parentheses. Two-tailed tests: ∗∗∗ p < 0.01, ∗∗ p < 0.05, ∗ p < 0.1.", 
        caption = "Table C-1: A MIMIC Model of Loyalty and Competence (5th Percentile Values of Imputations Used for Strength and Casualty Estimates)",
        caption.above = TRUE)

# Table C-2
htmlreg(file = file.path(DIR_OUTPUT, "ACH-Table-C2.html"),
        list(extract.mplus.model.boot(model = mplusnobootmax, model.boot = mplusbootmax, competence = TRUE),
             extract.mplus.model.boot(model = mplusnobootmax, model.boot = mplusbootmax, competence = FALSE)), 
        digits = 3, stars = c(0.1, 0.05, 0.01), 
        reorder.coef = c(5,7,11,6,8:10,12,17:22,1,4,2,3,13,15,14,16), 
        custom.model.names = c("Competence", "Confederate Loyalty"), 
        custom.note = "Note: Strength and casualty estimates used are the 95th percentile of imputed values, as opposed to the geometric mean used in the main text. Standardized factor loadings presented, with each latent factor standardized to have mean zero and variance one. Bootstrapped standard errors clustered on commanders in parentheses. Two-tailed tests: ∗∗∗ p < 0.01, ∗∗ p < 0.05, ∗ p < 0.1.", 
        caption = "Table C-2: A MIMIC Model of Loyalty and Competence (95th Percentile Values of Imputations Used for Strength and Casualty Estimates)",
        caption.above = TRUE)

### Regression Models (Tables C-3 through C-12)
## Smaller battles
sink("NULL")
set.seed(10)
LER.mod.0.small <- glm(cbind(round(CS_casualties), round(US_casualties)) ~ 
                         confed_comp + union_comp + confed_loyal + union_loyal -
                         confed_comp - union_comp - confed_loyal - union_loyal,
                       data = subset(battles_merged_singleentry,
                                     CS_strength_1000 + US_strength_1000 <= 
                                       median(CS_strength_1000 + US_strength_1000)),
                       family = quasibinomial(link = "logit"))
LER.mod.1.small <- glm(cbind(round(CS_casualties), round(US_casualties)) ~ 
                         confed_comp + union_comp + confed_loyal + union_loyal,
                       data = subset(battles_merged_singleentry,
                                     CS_strength_1000 + US_strength_1000 <= 
                                       median(CS_strength_1000 + US_strength_1000)),
                       family = quasibinomial(link = "logit"))
LER.mod.2.small <- glm(cbind(round(CS_casualties), round(US_casualties)) ~ 
                         confed_comp + union_comp + confed_loyal + union_loyal +
                         CS_strength_1000 + US_strength_1000,
                       data = subset(battles_merged_singleentry,
                                     CS_strength_1000 + US_strength_1000 <= 
                                       median(CS_strength_1000 + US_strength_1000)),
                       family = quasibinomial(link = "logit"))
LER.mod.3.small <- glm(cbind(round(CS_casualties), round(US_casualties)) ~ 
                         confed_comp + union_comp + confed_loyal + union_loyal +
                         CS_strength_1000 + US_strength_1000 + confed_battle,
                       data = subset(battles_merged_singleentry,
                                     CS_strength_1000 + US_strength_1000 <= 
                                       median(CS_strength_1000 + US_strength_1000)),
                       family = quasibinomial(link = "logit"))
LER.mod.4.small <- glm(cbind(round(CS_casualties), round(US_casualties)) ~ 
                         confed_comp + union_comp + (confed_loyal + union_loyal)*confed_battle +
                         CS_strength_1000 + US_strength_1000,
                       data = subset(battles_merged_singleentry,
                                     CS_strength_1000 + US_strength_1000 <= 
                                       median(CS_strength_1000 + US_strength_1000)),
                       family = quasibinomial(link = "logit"))
LER.mod.5.small <- glm(cbind(round(CS_casualties), round(US_casualties)) ~ 
                         confed_comp + union_comp + (confed_loyal + union_loyal)*confed_battle +
                         CS_strength_1000 + US_strength_1000 + attacker,
                       data = subset(battles_merged_singleentry,
                                     CS_strength_1000 + US_strength_1000 <= 
                                       median(CS_strength_1000 + US_strength_1000)),
                       family = quasibinomial(link = "logit"))

BS.mod.1.small <- bootcov2(lrm(outcome ~ confed_comp + union_comp + confed_loyal + union_loyal,
                              data = subset(battles_merged_singleentry,
                                            CS_strength_1000 + US_strength_1000 <=
                                              median(CS_strength_1000 + US_strength_1000)),
                              x = TRUE, y = TRUE), B = 1000)
BS.mod.2.small <- bootcov2(lrm(outcome ~ confed_comp + union_comp + confed_loyal + union_loyal +
                                CS_strength_1000 + US_strength_1000,
                              data = subset(battles_merged_singleentry,
                                            CS_strength_1000 + US_strength_1000 <=
                                              median(CS_strength_1000 + US_strength_1000)),
                              x = TRUE, y = TRUE), B = 1000)
BS.mod.3.small <- bootcov2(lrm(outcome ~ confed_comp + union_comp + confed_loyal + union_loyal +
                                CS_strength_1000 + US_strength_1000 + confed_battle,
                              data = subset(battles_merged_singleentry,
                                            CS_strength_1000 + US_strength_1000 <=
                                              median(CS_strength_1000 + US_strength_1000)),
                              x = TRUE, y = TRUE), B = 1000)
BS.mod.4.small <- bootcov2(lrm(outcome ~ confed_comp + union_comp + (confed_loyal + union_loyal)*confed_battle +
                                CS_strength_1000 + US_strength_1000,
                              data = subset(battles_merged_singleentry,
                                            CS_strength_1000 + US_strength_1000 <=
                                              median(CS_strength_1000 + US_strength_1000)),
                              x = TRUE, y = TRUE), B = 1000)
BS.mod.5.small <- bootcov2(lrm(outcome ~ confed_comp + union_comp + (confed_loyal + union_loyal)*confed_battle +
                                CS_strength_1000 + US_strength_1000 + attacker,
                              data = subset(battles_merged_singleentry,
                                            CS_strength_1000 + US_strength_1000 <=
                                              median(CS_strength_1000 + US_strength_1000)),
                              x = TRUE, y = TRUE), B = 1000)

LER.mod.0.large <- glm(cbind(round(CS_casualties), round(US_casualties)) ~ 
                         confed_comp + union_comp + confed_loyal + union_loyal -
                         confed_comp - union_comp - confed_loyal - union_loyal,
                       data = subset(battles_merged_singleentry,
                                     CS_strength_1000 + US_strength_1000 >= 
                                       median(CS_strength_1000 + US_strength_1000)),
                       family = quasibinomial(link = "logit"))

## Larger models
LER.mod.1.large <- glm(cbind(round(CS_casualties), round(US_casualties)) ~ 
                         confed_comp + union_comp + confed_loyal + union_loyal,
                       data = subset(battles_merged_singleentry,
                                     CS_strength_1000 + US_strength_1000 >= 
                                       median(CS_strength_1000 + US_strength_1000)),
                       family = quasibinomial(link = "logit"))
LER.mod.2.large <- glm(cbind(round(CS_casualties), round(US_casualties)) ~ 
                         confed_comp + union_comp + confed_loyal + union_loyal +
                         CS_strength_1000 + US_strength_1000,
                       data = subset(battles_merged_singleentry,
                                     CS_strength_1000 + US_strength_1000 >= 
                                       median(CS_strength_1000 + US_strength_1000)),
                       family = quasibinomial(link = "logit"))
LER.mod.3.large <- glm(cbind(round(CS_casualties), round(US_casualties)) ~ 
                         confed_comp + union_comp + confed_loyal + union_loyal +
                         CS_strength_1000 + US_strength_1000 + confed_battle,
                       data = subset(battles_merged_singleentry,
                                     CS_strength_1000 + US_strength_1000 >= 
                                       median(CS_strength_1000 + US_strength_1000)),
                       family = quasibinomial(link = "logit"))
LER.mod.4.large <- glm(cbind(round(CS_casualties), round(US_casualties)) ~ 
                         confed_comp + union_comp + (confed_loyal + union_loyal)*confed_battle +
                         CS_strength_1000 + US_strength_1000,
                       data = subset(battles_merged_singleentry,
                                     CS_strength_1000 + US_strength_1000 >= 
                                       median(CS_strength_1000 + US_strength_1000)),
                       family = quasibinomial(link = "logit"))
LER.mod.5.large <- glm(cbind(round(CS_casualties), round(US_casualties)) ~ 
                         confed_comp + union_comp + (confed_loyal + union_loyal)*confed_battle +
                         CS_strength_1000 + US_strength_1000 + attacker,
                       data = subset(battles_merged_singleentry,
                                     CS_strength_1000 + US_strength_1000 >= 
                                       median(CS_strength_1000 + US_strength_1000)),
                       family = quasibinomial(link = "logit"))

BS.mod.1.large <- bootcov2(lrm(outcome ~ confed_comp + union_comp + confed_loyal + union_loyal,
                              data = subset(battles_merged_singleentry,
                                            CS_strength_1000 + US_strength_1000 >=
                                              median(CS_strength_1000 + US_strength_1000)),
                              x = TRUE, y = TRUE), B = 1000)
BS.mod.2.large <- bootcov2(lrm(outcome ~ confed_comp + union_comp + confed_loyal + union_loyal +
                                CS_strength_1000 + US_strength_1000,
                              data = subset(battles_merged_singleentry,
                                            CS_strength_1000 + US_strength_1000 >=
                                              median(CS_strength_1000 + US_strength_1000)),
                              x = TRUE, y = TRUE), B = 1000)
BS.mod.3.large <- bootcov2(lrm(outcome ~ confed_comp + union_comp + confed_loyal + union_loyal +
                                CS_strength_1000 + US_strength_1000 + confed_battle,
                              data = subset(battles_merged_singleentry,
                                            CS_strength_1000 + US_strength_1000 >=
                                              median(CS_strength_1000 + US_strength_1000)),
                              x = TRUE, y = TRUE), B = 1000)
BS.mod.4.large <- bootcov2(lrm(outcome ~ confed_comp + union_comp + (confed_loyal + union_loyal)*confed_battle +
                                CS_strength_1000 + US_strength_1000,
                              data = subset(battles_merged_singleentry,
                                            CS_strength_1000 + US_strength_1000 >=
                                              median(CS_strength_1000 + US_strength_1000)),
                              x = TRUE, y = TRUE), B = 1000)
BS.mod.5.large <- bootcov2(lrm(outcome ~ confed_comp + union_comp + (confed_loyal + union_loyal)*confed_battle +
                                CS_strength_1000 + US_strength_1000 + attacker,
                              data = subset(battles_merged_singleentry,
                                            CS_strength_1000 + US_strength_1000 >=
                                              median(CS_strength_1000 + US_strength_1000)),
                              x = TRUE, y = TRUE), B = 1000)
sink()

# Bootstrapping the quasibinomial models since we can't use bootcov() with rms
set.seed(10)
LER.mod.1.small.boot.out <- boot(data = subset(battles_merged_singleentry,
                                               CS_strength_1000 + US_strength_1000 <=
                                                 median(CS_strength_1000 + US_strength_1000)),
                                 model = LER.mod.1.small, statistic= mod.boot, R=1000)
LER.mod.2.small.boot.out <- boot(data = subset(battles_merged_singleentry,
                                               CS_strength_1000 + US_strength_1000 <=
                                                 median(CS_strength_1000 + US_strength_1000)),
                                 model = LER.mod.2.small, statistic= mod.boot, R=1000)
LER.mod.3.small.boot.out <- boot(data = subset(battles_merged_singleentry,
                                               CS_strength_1000 + US_strength_1000 <=
                                                 median(CS_strength_1000 + US_strength_1000)),
                                 model = LER.mod.3.small, statistic= mod.boot, R=1000)
LER.mod.4.small.boot.out <- boot(data = subset(battles_merged_singleentry,
                                               CS_strength_1000 + US_strength_1000 <=
                                                 median(CS_strength_1000 + US_strength_1000)),
                                 model = LER.mod.4.small, statistic= mod.boot, R=1000)
LER.mod.5.small.boot.out <- boot(data = subset(battles_merged_singleentry,
                                               CS_strength_1000 + US_strength_1000 <=
                                                 median(CS_strength_1000 + US_strength_1000)),
                                 model = LER.mod.5.small, statistic= mod.boot, R=1000)

LER.mod.1.large.boot.out <- boot(data = subset(battles_merged_singleentry,
                                               CS_strength_1000 + US_strength_1000 >=
                                                 median(CS_strength_1000 + US_strength_1000)),
                                 model = LER.mod.1.large, statistic= mod.boot, R=1000)
LER.mod.2.large.boot.out <- boot(data = subset(battles_merged_singleentry,
                                               CS_strength_1000 + US_strength_1000 >=
                                                 median(CS_strength_1000 + US_strength_1000)),
                                 model = LER.mod.2.large, statistic= mod.boot, R=1000)
LER.mod.3.large.boot.out <- boot(data = subset(battles_merged_singleentry,
                                               CS_strength_1000 + US_strength_1000 >=
                                                 median(CS_strength_1000 + US_strength_1000)),
                                 model = LER.mod.3.large, statistic= mod.boot, R=1000)
LER.mod.4.large.boot.out <- boot(data = subset(battles_merged_singleentry,
                                               CS_strength_1000 + US_strength_1000 >=
                                                 median(CS_strength_1000 + US_strength_1000)),
                                 model = LER.mod.4.large, statistic= mod.boot, R=1000)
LER.mod.5.large.boot.out <- boot(data = subset(battles_merged_singleentry,
                                               CS_strength_1000 + US_strength_1000 >=
                                                 median(CS_strength_1000 + US_strength_1000)),
                                 model = LER.mod.5.large, statistic= mod.boot, R=1000)

# Coefficient names for presentation of crossover models
BS.coef.names.crossover <- c("Cutpoint 2", "Cutpoint 1",
                             "Confederate Commander Competence", "Union Commander Competence",
                             "Confederate Commander Loyalty", "Union Commander Loyalty",
                             "Crossover Confederate Commander Loyalty", "Crossover Union Commander Loyalty",
                             "Confederate Strength", "Union Strength",
                             "Confederate Battle",
                             "Confederate Commander Loyalty x Confederate Battle",
                             "Union Commander Loyalty x Confederate Battle",
                             "Crossover Confederate Commander Loyalty x Confederate Battle",
                             "Crossover Union Commander Loyalty x Confederate Battle",
                             "Union Attacker")

LER.coef.names.crossover <- c("Constant",
                              "Confederate Commander Competence", "Union Commander Competence",
                              "Confederate Commander Loyalty", "Union Commander Loyalty",
                              "Crossover Confederate Commander Loyalty", "Crossover Union Commander Loyalty",
                              "Confederate Strength", "Union Strength",
                              "Confederate Battle",
                              "Confederate Commander Loyalty x Confederate Battle",
                              "Union Commander Loyalty x Confederate Battle",
                              "Crossover Confederate Commander Loyalty x Confederate Battle",
                              "Crossover Union Commander Loyalty x Confederate Battle",
                              "Union Attacker")

# Regression Tables (C-3 through C-12)
htmlreg(file = file.path(DIR_OUTPUT, "ACH-Table-C3.html"),
        list(extract.lrm(BS.mod.1.min),
             extract.lrm(BS.mod.2.min),
             extract.lrm(BS.mod.3.min),
             extract.lrm(BS.mod.4.min),
             extract.lrm(BS.mod.5.min),
             extract.lrm(BS.mod.6.min)),
        omit.coef = "(S1|S2|S3|S4|S5|S6|S7|S8|S9|S0)",
        stars = c(0.01, 0.05, 0.1),
        digits = 3, include.thresholds = TRUE,
        custom.coef.names = BS.coef.names,
        reorder.coef = c(3:6,7:12,2,1),
        caption = "Table C-3: Latent Traits and Battle Outcomes (Minimum Traits Used)",
        caption.above = TRUE,
        custom.note = "Note: Ordered logit coefficients presented. Observations are at the battle level. The dependent variable is an ordered factor with the levels being, in order, Confederate Victory, Inconclusive, and Union Victory. Negative [positive] coefficients indicate the covariate is associated with higher probabilities of Confederate [Union] victories. Standard errors in parentheses. Model C-6 includes commander-level fixed effects for those who fought in at least seven battles. Two-tailed tests: ∗∗∗ p < 0.01, ∗∗ p < 0.05, ∗ p < 0.1.",
        custom.model.names = c("Model C-1", "Model C-2", "Model C-3",
                               "Model C-4", "Model C-5", "Model C-6"),
        booktabs = TRUE, dcolumn  = TRUE, table = FALSE,  use.packages = FALSE)

htmlreg(file = file.path(DIR_OUTPUT, "ACH-Table-C4.html"),
        list(extract.glm.boot(LER.mod.1.min, boot = LER.mod.1.min.boot.out, nullmod = LER.mod.0),
             extract.glm.boot(LER.mod.2.min, boot = LER.mod.2.min.boot.out, nullmod = LER.mod.0),
             extract.glm.boot(LER.mod.3.min, boot = LER.mod.3.min.boot.out, nullmod = LER.mod.0),
             extract.glm.boot(LER.mod.4.min, boot = LER.mod.4.min.boot.out, nullmod = LER.mod.0),
             extract.glm.boot(LER.mod.5.min, boot = LER.mod.5.min.boot.out, nullmod = LER.mod.0),
             extract.glm.boot(LER.mod.6.min, boot = LER.mod.6.min.boot.out, nullmod = LER.mod.0)),
        omit.coef = "(genFE)",
        stars = c(0.01, 0.05, 0.1), 
        digits = 3, include.thresholds = TRUE,
        custom.coef.names = LER.coef.names,
        caption = "Table C-4: Latent Traits and Relative Confederate Casualty Rates (Minimum Traits Used)",
        caption.above = TRUE,
        custom.note = "Note: Quasi-binomial logistic coefficients presented. Observations are at the battle level. The dependent variable is the proportion of battle casualties from the Confederacy. Positive [negative] coefficients indicate the covariate is associated with higher [lower] ratios of Confederate casualties. Model C-12 includes commander-level fixed effects for those who fought in at least seven battles. Bootstrapped standard errors in parentheses. Two-tailed tests: ∗∗∗ p < 0.01, ∗∗ p < 0.05, ∗ p < 0.1.",
        custom.model.names = c("Model C-7", "Model C-8", "Model C-9",
                               "Model C-10", "Model C-11", "Model C-12"),
        booktabs = TRUE, dcolumn  = TRUE, table = FALSE,  use.packages = FALSE,
        reorder.coef = c(2:11, 1))

htmlreg(file = file.path(DIR_OUTPUT, "ACH-Table-C5.html"),
        list(extract.lrm(BS.mod.1.max),
             extract.lrm(BS.mod.2.max),
             extract.lrm(BS.mod.3.max),
             extract.lrm(BS.mod.4.max),
             extract.lrm(BS.mod.5.max),
             extract.lrm(BS.mod.6.max)),
        omit.coef = "(S1|S2|S3|S4|S5|S6|S7|S8|S9|S0)",
        stars = c(0.01, 0.05, 0.1), 
        digits = 3, include.thresholds = TRUE,
        custom.coef.names = BS.coef.names,
        reorder.coef = c(3:6,7:12,2,1),
        custom.note = "Note: Ordered logit coefficients presented. Observations are at the battle level. The dependent variable is an ordered factor with the levels being, in order, Confederate Victory, Inconclusive, and Union Victory. Negative [positive] coefficients indicate the covariate is associated with higher probabilities of Confederate [Union] victories. Bootstrapped standard errors in parentheses. Model C-18 includes commander-level fixed effects for those who fought in at least seven battles. Two-tailed tests: ∗∗∗ p < 0.01, ∗∗ p < 0.05, ∗ p < 0.1.",
        custom.model.names = c("Model C-13", "Model C-14", "Model C-15",
                               "Model C-16", "Model C-17", "Model C-18"),
        caption = "Table C-5: Latent Traits and Battle Outcomes (Maximum Traits Used)",
        caption.above = TRUE,
        booktabs = TRUE, dcolumn  = TRUE, table = FALSE,  use.packages = FALSE)

htmlreg(file = file.path(DIR_OUTPUT, "ACH-Table-C6.html"),
        list(extract.glm.boot(LER.mod.1.max, boot = LER.mod.1.max.boot.out, nullmod = LER.mod.0),
             extract.glm.boot(LER.mod.2.max, boot = LER.mod.2.max.boot.out, nullmod = LER.mod.0),
             extract.glm.boot(LER.mod.3.max, boot = LER.mod.3.max.boot.out, nullmod = LER.mod.0),
             extract.glm.boot(LER.mod.4.max, boot = LER.mod.4.max.boot.out, nullmod = LER.mod.0),
             extract.glm.boot(LER.mod.5.max, boot = LER.mod.5.max.boot.out, nullmod = LER.mod.0),
             extract.glm.boot(LER.mod.6.max, boot = LER.mod.6.max.boot.out, nullmod = LER.mod.0)),
        omit.coef = "(genFE)",
        stars = c(0.01, 0.05, 0.1), 
        digits = 3, include.thresholds = TRUE,
        custom.coef.names = LER.coef.names,
        custom.note = "Note: Quasi-binomial logistic coefficients presented. Observations are at the battle level. The dependent variable is the proportion of battle casualties from the Confederacy. Positive [negative] coefficients indicate the covariate is associated with higher [lower] ratios of Confederate casualties. Model C-24 includes commander-level fixed effects for those who fought in at least seven battles. Bootstrapped standard errors in parentheses. Two-tailed tests: ∗∗∗ p < 0.01, ∗∗ p < 0.05, ∗ p < 0.1.",
        caption = "Table C-6: Latent Traits and Relative Confederate Casualty Rates (Maximum Traits Used)",
        caption.above = TRUE,
        custom.model.names = c("Model C-19", "Model C-20", "Model C-21",
                               "Model C-22", "Model C-23", "Model C-24"),
        booktabs = TRUE, dcolumn  = TRUE, table = FALSE,  use.packages = FALSE,
        reorder.coef = c(2:11, 1))

htmlreg(file = file.path(DIR_OUTPUT, "ACH-Table-C7.html"),
        list(extract.lrm(BS.mod.1.small),
             extract.lrm(BS.mod.2.small),
             extract.lrm(BS.mod.3.small),
             extract.lrm(BS.mod.4.small),
             extract.lrm(BS.mod.5.small)),
        omit.coef = "(S1|S2|S3|S4|S5|S6|S7|S8|S9|S0)",
        stars = c(0.01, 0.05, 0.1),
        digits = 3, include.thresholds = TRUE,
        custom.coef.names = BS.coef.names,
        reorder.coef = c(3:6,7:12,2,1),
        custom.note = "Note: Ordered logit coefficients presented. Observations are at the battle level. The dependent variable is an ordered factor with the levels being, in order, Confederate Victory, Inconclusive, and Union Victory. Negative [positive] coefficients indicate the covariate is associated with higher probabilities of Confederate [Union] victories. Battles under analysis are those where the total forces on both sides are at or below the median in our dataset. Bootstrapped standard errors in parentheses. Two-tailed tests: ∗∗∗ p < 0.01, ∗∗ p < 0.05, ∗ p < 0.1.",
        custom.model.names = c("Model C-25", "Model C-26", "Model C-27",
                               "Model C-28", "Model C-29"),
        caption = "Table C-7: Latent Traits and Battle Outcomes (Smaller Battles)",
        caption.above = TRUE,
        booktabs = TRUE, dcolumn  = TRUE, table = FALSE,  use.packages = FALSE)

htmlreg(file = file.path(DIR_OUTPUT, "ACH-Table-C8.html"),
        list(extract.glm.boot(LER.mod.1.small, boot = LER.mod.1.small.boot.out, nullmod = LER.mod.0.small),
             extract.glm.boot(LER.mod.2.small, boot = LER.mod.2.small.boot.out, nullmod = LER.mod.0.small),
             extract.glm.boot(LER.mod.3.small, boot = LER.mod.3.small.boot.out, nullmod = LER.mod.0.small),
             extract.glm.boot(LER.mod.4.small, boot = LER.mod.4.small.boot.out, nullmod = LER.mod.0.small),
             extract.glm.boot(LER.mod.5.small, boot = LER.mod.5.small.boot.out, nullmod = LER.mod.0.small)),
        omit.coef = "(genFE)",
        stars = c(0.01, 0.05, 0.1), 
        digits = 3, include.thresholds = TRUE,
        custom.coef.names = LER.coef.names,
        custom.note = "Note: Quasi-binomial logistic coefficients presented. Observations at the battle level. The dependent variable is the proportion of battle casualties from the Confederacy. Positive [negative] coefficients indicate the covariate is associated with higher [lower] ratios of Confederate casualties. Battles under analysis are those where the total forces on both sides are at or below the median in our dataset. Bootstrapped standard errors in parentheses. Two-tailed tests: ∗∗∗ p < 0.01, ∗∗ p < 0.05, ∗ p < 0.1.",
        custom.model.names = c("Model C-30", "Model C-31", "Model C-32",
                               "Model C-33", "Model C-34"),
        caption = "Table C-8: Latent Traits and Relative Confederate Casualty Rates (Smaller Battles)",
        caption.above = TRUE,
        booktabs = TRUE, dcolumn  = TRUE, table = FALSE,  use.packages = FALSE,
        reorder.coef = c(2:11, 1))

htmlreg(file = file.path(DIR_OUTPUT, "ACH-Table-C9.html"),
        list(extract.lrm(BS.mod.1.large),
             extract.lrm(BS.mod.2.large),
             extract.lrm(BS.mod.3.large),
             extract.lrm(BS.mod.4.large),
             extract.lrm(BS.mod.5.large)),
        omit.coef = "(S1|S2|S3|S4|S5|S6|S7|S8|S9|S0)",
        stars = c(0.01, 0.05, 0.1), 
        digits = 3, include.thresholds = TRUE,
        custom.coef.names = BS.coef.names,
        reorder.coef = c(3:6,7:12,2,1),
        custom.note = "Note: Ordered logit coefficients presented. Observations are at the battle level. The dependent variable is an ordered factor with the levels being, in order, Confederate Victory, Inconclusive, and Union Victory. Negative [positive] coefficients indicate the covariate is associated with higher probabilities of Confederate [Union] victories. Battles under analysis are those where the total forces on both sides are at or above the median in our dataset. Bootstrapped standard errors in parentheses. Two-tailed tests: ∗∗∗ p < 0.01, ∗∗ p < 0.05, ∗ p < 0.1.",
        custom.model.names = c("Model C-35", "Model C-36", "Model C-37",
                               "Model C-38", "Model C-39"),
        caption = "Table C-9: Latent Traits and Battle Outcomes (Larger Battles)",
        caption.above = TRUE,
        booktabs = TRUE, dcolumn  = TRUE, table = FALSE,  use.packages = FALSE)

htmlreg(file = file.path(DIR_OUTPUT, "ACH-Table-C10.html"),
        list(extract.glm.boot(LER.mod.1.large, boot = LER.mod.1.large.boot.out, nullmod = LER.mod.0.large),
             extract.glm.boot(LER.mod.2.large, boot = LER.mod.2.large.boot.out, nullmod = LER.mod.0.large),
             extract.glm.boot(LER.mod.3.large, boot = LER.mod.3.large.boot.out, nullmod = LER.mod.0.large),
             extract.glm.boot(LER.mod.4.large, boot = LER.mod.4.large.boot.out, nullmod = LER.mod.0.large),
             extract.glm.boot(LER.mod.5.large, boot = LER.mod.5.large.boot.out, nullmod = LER.mod.0.large)),
        omit.coef = "(genFE)",
        stars = c(0.01, 0.05, 0.1),
        digits = 3, include.thresholds = TRUE,
        custom.coef.names = LER.coef.names,
        custom.note = "Note: Quasi-binomial logistic coefficients presented. Observations at the battle level. The dependent variable is the proportion of battle casualties from the Confederacy. Positive [negative] coefficients indicate the covariate is associated with higher [lower] ratios of Confederate casualties. Battles under analysis are those where the total forces on both sides are at or above the median in our dataset. Bootstrapped standard errors in parentheses. Two-tailed tests: ∗∗∗ p < 0.01, ∗∗ p < 0.05, ∗ p < 0.1.",
        custom.model.names = c("Model C-40", "Model C-41", "Model C-42",
                               "Model C-43", "Model C-44"),
        caption = "Table C-10: Latent Traits and Relative Confederate Casualty Rates (Larger Battles)",
        caption.above = TRUE,
        booktabs = TRUE, dcolumn  = TRUE, table = FALSE,  use.packages = FALSE,
        reorder.coef = c(2:11, 1))

htmlreg(file = file.path(DIR_OUTPUT, "ACH-Table-C11.html"),
        list(extract.lrm(BS.mod.1.cross),
             extract.lrm(BS.mod.2.cross),
             extract.lrm(BS.mod.3.cross),
             extract.lrm(BS.mod.4.cross),
             extract.lrm(BS.mod.5.cross),
             extract.lrm(BS.mod.6.cross)),
        omit.coef = "(S1|S2|S3|S4|S5|S6|S7|S8|S9|S0)",
        stars = c(0.01, 0.05, 0.1),
        digits = 3, include.thresholds = TRUE,
        custom.coef.names = BS.coef.names.crossover,
        custom.note = "Note: Ordered logit coefficients presented. Observations are at the battle level. “Crossover” variables indicate the battle-level mean competence for “crossover” commanders of the indicated side. The dependent variable is an ordered factor with the levels being, in order, Confederate Victory, Inconclusive, and Union Victory. Negative [positive] coefficients indicate the covariate is associated with higher probabilities of Confederate [Union] victories. Battles under analysis are those where the total forces on both sides are at or above the median in our dataset. Model C-50 includes commander-level fixed effects for those who fought in at least seven battles. Bootstrapped standard errors in parentheses. Two-tailed tests: ∗∗∗ p < 0.01, ∗∗ p < 0.05, ∗ p < 0.1.", 
        reorder.coef = c(3:8,9:16,2,1),
        caption.above = TRUE,
        caption = "Table C-11: Latent Traits and Battle Outcomes (With “Crossover” Variables)",
        custom.model.names = c("Model C-45", "Model C-46", "Model C-47",
                               "Model C-48", "Model C-49", "Model C-50"),
        booktabs = TRUE, dcolumn  = TRUE, table = FALSE,  use.packages = FALSE)

htmlreg(file = file.path(DIR_OUTPUT, "ACH-Table-C12.html"),
        list(extract.glm.boot(LER.mod.1.cross, boot = LER.mod.1.cross.boot.out, nullmod = LER.mod.0),
             extract.glm.boot(LER.mod.2.cross, boot = LER.mod.2.cross.boot.out, nullmod = LER.mod.0),
             extract.glm.boot(LER.mod.3.cross, boot = LER.mod.3.cross.boot.out, nullmod = LER.mod.0),
             extract.glm.boot(LER.mod.4.cross, boot = LER.mod.4.cross.boot.out, nullmod = LER.mod.0),
             extract.glm.boot(LER.mod.5.cross, boot = LER.mod.5.cross.boot.out, nullmod = LER.mod.0),
             extract.glm.boot(LER.mod.6.cross, boot = LER.mod.6.cross.boot.out, nullmod = LER.mod.0)),
        omit.coef = "(genFE)",
        stars = c(0.01, 0.05, 0.1), 
        digits = 3, include.thresholds = TRUE,
        custom.coef.names = LER.coef.names.crossover,
        caption = "Table C-12: Latent Traits and Relative Confederate Casualty Rates (With “Crossover” Variables)",
        caption.above = TRUE,
        custom.note = "Note: Quasi-binomial logistic coefficients presented. Observations at the battle level. “Crossover” variables indicate the battle-level mean competence for “crossover” commanders of the indicated side. The dependent variable is the proportion of battle casualties from the Confederacy. Positive [negative] coefficients indicate the covariate is associated with higher [lower] ratios of Confederate casualties. Battles under analysis are those where the total forces on both sides are at or above the median in our dataset. Model C-56 includes commander-level fixed effects for those who fought in at least seven battles. Bootstrapped standard errors in parentheses. Two-tailed tests: ∗∗∗ p < 0.01, ∗∗ p < 0.05, ∗ p < 0.1.",
        custom.model.names = c("Model C-51", "Model C-52", "Model C-53",
                               "Model C-54", "Model C-55", "Model C-56"),
        booktabs = TRUE, dcolumn  = TRUE, table = FALSE,  use.packages = FALSE,
        reorder.coef = c(2:15, 1))



### Appendix D: Political Generals
# Wikipedia accessed November 27, 2017
# Figure D-1 (a and b)
sink("NULL")
set.seed(10)
polgens <- read.csv("data/ACH-political_generals.csv")
colnames(polgens)[3] <- "wiki_polgen"
polgens$wiki_polgen <- as.numeric(!is.na(polgens$wiki_polgen))
polgens$work_polgen <- as.numeric(!is.na(polgens$work_polgen))
colnames(lifetime_means_loyal) <- c("comm_id", "loyalty", "confederate")
lifetime_means <- merge(lifetime_means_loyal,polgens)
wikigen.mod <-bootcov2(lrm(wiki_polgen ~ loyalty,
                          data = lifetime_means,
                          x = TRUE, y = TRUE),
                      B = 1000)
workgen.mod <-bootcov2(lrm(work_polgen ~ loyalty,
                          data = lifetime_means[grep("US", lifetime_means$comm_id),],
                          x = TRUE, y = TRUE),
                      B = 1000)
sink()

wikigen.newcoefs <- rmvnorm(10000, mean = c(wikigen.mod$coefficients),
                            sigma = vcov(wikigen.mod))
workgen.newcoefs <- rmvnorm(10000, mean = c(workgen.mod$coefficients),
                            sigma = vcov(workgen.mod))
polgen.frame <- data.frame("(Intercept)" = c(1,1),
                           loyalty = c(mean(lifetime_means$loyalty) - sd(lifetime_means$loyalty),
                                       mean(lifetime_means$loyalty) + sd(lifetime_means$loyalty)))
wikigen.prob.nonpol <- plogis(as.matrix(polgen.frame[1,]) %*% t(wikigen.newcoefs))
wikigen.prob.pol <- plogis(as.matrix(polgen.frame[2,]) %*% t(wikigen.newcoefs))
workgen.prob.nonpol <- plogis(as.matrix(polgen.frame[1,]) %*% t(workgen.newcoefs))
workgen.prob.pol <- plogis(as.matrix(polgen.frame[2,]) %*% t(workgen.newcoefs))
wikigen.predframe <- data.frame(low.95 = quantile(wikigen.prob.pol - wikigen.prob.nonpol, 0.025),
                                low.90 = quantile(wikigen.prob.pol - wikigen.prob.nonpol, 0.05),
                                pred = mean(wikigen.prob.pol - wikigen.prob.nonpol),
                                high.90 = quantile(wikigen.prob.pol - wikigen.prob.nonpol, 0.95),
                                high.95 = quantile(wikigen.prob.pol - wikigen.prob.nonpol, 0.975),
                                Source = "Wikipedia (2017)")
workgen.predframe <- data.frame(low.95 = quantile(workgen.prob.pol - workgen.prob.nonpol, 0.025),
                                low.90 = quantile(workgen.prob.pol - workgen.prob.nonpol, 0.05),
                                pred = mean(workgen.prob.pol - workgen.prob.nonpol),
                                high.90 = quantile(workgen.prob.pol - workgen.prob.nonpol, 0.95),
                                high.95 = quantile(workgen.prob.pol - workgen.prob.nonpol, 0.975),
                                Source = "Work (2012;\nUnion Only)")

polgen.predframe <- rbind(wikigen.predframe, workgen.predframe)

polgen.mfx.plot <- ggplot(polgen.predframe, aes(x = Source, y = pred, shape = Source, color = Source)) +
  geom_point(position=position_dodge(width=0.5)) +
  geom_errorbar(aes(ymin = low.90, ymax = high.90),
                position=position_dodge(width=0.5),
                width = 0, size = 1) +
  geom_errorbar(aes(ymin = low.95, ymax = high.95),
                position=position_dodge(width=0.5),
                width = 0) +
  coord_flip() +
  theme_bw() +
  geom_hline(yintercept = 0, linetype = "dotted") +
  xlab('Source List\n') +
  ylab('\nPredicted Change in Probability\nof Being a Political General') +
  scale_color_grey(name = "Source",
                   start = 0, end = 0.6) +
  scale_shape_discrete(name = "Source List") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  guides(colour = FALSE,
         shape = FALSE)


ggsave(polgen.mfx.plot, file = file.path(DIR_FIGURES, "ACH-Figure-D1a.eps"), 
       device = cairo_ps, width = 4, height = 3)

new_lifetime_means <- rbind(data.frame(subset(lifetime_means[grep("US", lifetime_means$comm_id),],
                                              select = -c(wiki_polgen, work_polgen)),
                                       polgen = lifetime_means[grep("US", lifetime_means$comm_id),]$work_polgen,
                                       Source = "Work (2012; Union Only)"),
                            data.frame(subset(lifetime_means, select = -c(wiki_polgen, work_polgen)),
                                       polgen = lifetime_means$wiki_polgen,
                                       Source = "Wikipedia (2017)"))

data_summary <- function(x) {
  m <- mean(x)
  ymin <- m-sd(x)
  ymax <- m+sd(x)
  return(c(y=m,ymin=ymin,ymax=ymax))
}

polgen.violin <- qplot(data = new_lifetime_means,
                       x = factor(polgen),
                       y = scale(loyalty), geom = "violin", fill = I(NA)) +
  theme_bw() + facet_wrap(~Source) +
  coord_flip() +
  ylab('\nMean Lifetime Loyalty\n(Number of Standard Deviations from Mean)') +
  scale_x_discrete(breaks = c(0,1), labels = c("No", "Yes"))  +
  theme(legend.position="none") + xlab('Listed as Political General?\n') +
  stat_summary(fun.data=data_summary)

ggsave(polgen.violin, file = file.path(DIR_FIGURES, "ACH-Figure-D1b.eps"), 
       device = cairo_ps, width = 5, height = 3.5)



### Appendix E: Comparing battles lost due to not having commander data
newbattles$dropped <- 1 - as.numeric(newbattles$battle %in% na.omit(battles_merged_singleentry)$battle)

sink("NULL")
set.seed(10)
lost.mod <- bootcov2(lrm(dropped ~ I(casualties_U/1000) + I(strength_U/1000) + 
                          I(casualties_C/1000) + I(strength_C/1000) + outcome +
                          significance + theater + surrender + confed_battle + attacker + 
                          duration + commander_rank_C_number + commander_rank_U_number, 
                        data = newbattles, x = TRUE, y = TRUE), B = 1000)

lost.mod.min <- bootcov2(lrm(dropped ~ I(casualties_U_min/1000) + I(strength_U_min/1000) + 
                              I(casualties_C_min/1000) + I(strength_C_min/1000) + outcome +
                              significance + theater + surrender + confed_battle + attacker + 
                              duration + commander_rank_C_number + commander_rank_U_number, 
                            data = newbattles, x = TRUE, y = TRUE), B = 1000)

lost.mod.max <- bootcov2(lrm(dropped ~ I(casualties_U_max/1000) + I(strength_U_max/1000) + 
                              I(casualties_C_max/1000) + I(strength_C_max/1000) + outcome +
                              significance + theater + surrender + confed_battle + attacker + 
                              duration + commander_rank_C_number + commander_rank_U_number, 
                            data = newbattles, x = TRUE, y = TRUE), B = 1000)
sink()

lost.coef.names <- c("Constant", "Union Casualties", "Union Strength",
                     "Confederate Casualties", "Confederate Strength",
                     "Inconclusive Outcome", "Confederate Victory",
                     "Major Battle", "Formative Battle", "Limited Battle",
                     "Eastern Theater", "Western Theater",
                     "Trans-Mississippi Theater", "No Surrender",
                     "Union Surrender", "Confederate Battle",
                     "Union Attacker", "Battle Duration", 
                     "Top Confederate Commander Rank",
                     "Top Union Commander Rank",
                     "Union Casualties-Low", "Union Strength-Low",
                     "Confederate Casualties-Low", "Confederate Strength-Low",
                     "Union Casualties-High", "Union Strength-High",
                     "Confederate Casualties-High", "Confederate Strength-High")

# Table E-1
htmlreg(file = file.path(DIR_OUTPUT, "ACH-Table-E1.html"),
        list(extract.lrm(lost.mod),
             extract.lrm(lost.mod.min),
             extract.lrm(lost.mod.max)),
        stars = c(0.01, 0.05, 0.1),
        digits = 3, include.thresholds = TRUE,
        custom.coef.names = lost.coef.names,
        reorder.coef = c(2:28, 1),
        custom.model.names = c("Geometric Mean",
                               "5th Percentile",
                               "95th Percentile"),
        custom.note = "Note: Logistic coefficients presented. Observations are at the battle level. The dependent variable equals one if the battle was dropped from our main analyses due to missing commander-level data. Negative [positive] coefficients indicate the covariate is associated with higher probabilities of being dropped. The “Geometric Mean” column uses the strength and casualty estimates based on the geometric mean of imputed values; the 5th and 95th percentile columns are analogously defined. Bootstrapped standard errors in parentheses. Two-tailed tests: ∗∗∗ p < 0.01, ∗∗ p < 0.05, ∗ p < 0.1.",
        caption = "Table E-1: Predictors of Dropped Battles",
        caption.above = TRUE,
        booktabs = TRUE, dcolumn  = TRUE, table = FALSE,  use.packages = FALSE)


# Clean up files we created to use outside programs
unlink(file.path(DIR_CODE,"ACH-Conversion.do"))
unlink("header.txt")
unlink("NULL")
unlink(file.path(DIR_CODE,"ACH-MIMIC-NoBootstrap.inp"))
unlink(file.path(DIR_CODE,"ACH-MIMIC-NoBootstrapMin.inp"))
unlink(file.path(DIR_CODE,"ACH-MIMIC-NoBootstrapMax.inp"))
unlink(file.path(DIR_CODE,"ACH-MIMIC-Bootstrap.inp"))
unlink(file.path(DIR_CODE,"ACH-MIMIC-BootstrapMin.inp"))
unlink(file.path(DIR_CODE,"ACH-MIMIC-BootstrapMax.inp"))

